diff options
573 files changed, 15175 insertions, 7275 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index 6856da99b6f7..2f1788111cd9 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -259,3 +259,27 @@ Contact: [email protected] Description: Symbolic link to the PHY device this network device is attached to. + +What: /sys/class/net/<iface>/carrier_changes +Date: Mar 2014 +KernelVersion: 3.15 +Contact: [email protected] +Description: + 32-bit unsigned integer counting the number of times the link has + seen a change from UP to DOWN and vice versa + +What: /sys/class/net/<iface>/carrier_up_count +Date: Jan 2018 +KernelVersion: 4.16 +Contact: [email protected] +Description: + 32-bit unsigned integer counting the number of times the link has + been up + +What: /sys/class/net/<iface>/carrier_down_count +Date: Jan 2018 +KernelVersion: 4.16 +Contact: [email protected] +Description: + 32-bit unsigned integer counting the number of times the link has + been down diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt index 2c4e3354e128..d2fd78f85aa4 100644 --- a/Documentation/networking/pktgen.txt +++ b/Documentation/networking/pktgen.txt @@ -12,8 +12,8 @@ suitable sample script and configure that. On a dual CPU: ps aux | grep pkt -root 129 0.3 0.0 0 0 ? SW 2003 523:20 [pktgen/0] -root 130 0.3 0.0 0 0 ? SW 2003 509:50 [pktgen/1] +root 129 0.3 0.0 0 0 ? SW 2003 523:20 [kpktgend_0] +root 130 0.3 0.0 0 0 ? SW 2003 509:50 [kpktgend_1] For monitoring and control pktgen creates: @@ -113,9 +113,16 @@ Configuring devices =================== This is done via the /proc interface, and most easily done via pgset as defined in the sample scripts. +You need to specify PGDEV environment variable to use functions from sample +scripts, i.e.: +export PGDEV=/proc/net/pktgen/eth4@0 +source samples/pktgen/functions.sh Examples: + pg_ctrl start starts injection. + pg_ctrl stop aborts injection. Also, ^C aborts generator. + pgset "clone_skb 1" sets the number of copies of the same packet pgset "clone_skb 0" use single SKB for all transmits pgset "burst 8" uses xmit_more API to queue 8 copies of the same @@ -165,8 +172,12 @@ Examples: IPSEC # IPsec encapsulation (needs CONFIG_XFRM) NODE_ALLOC # node specific memory allocation NO_TIMESTAMP # disable timestamping + pgset 'flag ![name]' Clear a flag to determine behaviour. + Note that you might need to use single quote in + interactive mode, so that your shell wouldn't expand + the specified flag as a history command. - pgset spi SPI_VALUE Set specific SA used to transform packet. + pgset "spi [SPI_VALUE]" Set specific SA used to transform packet. pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then cycle through the port range. @@ -207,8 +218,6 @@ Examples: pgset "tos XX" set former IPv4 TOS field (e.g. "tos 28" for AF11 no ECN, default 00) pgset "traffic_class XX" set former IPv6 TRAFFIC CLASS (e.g. "traffic_class B8" for EF no ECN, default 00) - pgset stop aborts injection. Also, ^C aborts generator. - pgset "rate 300M" set rate to 300 Mb/s pgset "ratep 1000000" set rate to 1Mpps diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 57d3ee9e4bde..fc3ae951bc07 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3403,6 +3403,52 @@ invalid, if invalid pages are written to (e.g. after the end of memory) or if no page table is present for the addresses (e.g. when using hugepages). +4.108 KVM_PPC_GET_CPU_CHAR + +Capability: KVM_CAP_PPC_GET_CPU_CHAR +Architectures: powerpc +Type: vm ioctl +Parameters: struct kvm_ppc_cpu_char (out) +Returns: 0 on successful completion + -EFAULT if struct kvm_ppc_cpu_char cannot be written + +This ioctl gives userspace information about certain characteristics +of the CPU relating to speculative execution of instructions and +possible information leakage resulting from speculative execution (see +CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754). The information is +returned in struct kvm_ppc_cpu_char, which looks like this: + +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +For extensibility, the character_mask and behaviour_mask fields +indicate which bits of character and behaviour have been filled in by +the kernel. If the set of defined bits is extended in future then +userspace will be able to tell whether it is running on a kernel that +knows about the new bits. + +The character field describes attributes of the CPU which can help +with preventing inadvertent information disclosure - specifically, +whether there is an instruction to flash-invalidate the L1 data cache +(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set +to a mode where entries can only be used by the thread that created +them, whether the bcctr[l] instruction prevents speculation, and +whether a speculation barrier instruction (ori 31,31,0) is provided. + +The behaviour field describes actions that software should take to +prevent inadvertent information disclosure, and thus describes which +vulnerabilities the hardware is subject to; specifically whether the +L1 data cache should be flushed when returning to user mode from the +kernel, and whether a speculation barrier should be placed between an +array bounds check and the array access. + +These fields use the same bit definitions as the new +H_GET_CPU_CHARACTERISTICS hypercall. + 5. The kvm_run structure ------------------------ diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt index d11eff61fc9a..5cd58439ad2d 100644 --- a/Documentation/x86/pti.txt +++ b/Documentation/x86/pti.txt @@ -78,7 +78,7 @@ this protection comes at a cost: non-PTI SYSCALL entry code, so requires mapping fewer things into the userspace page tables. The downside is that stacks must be switched at entry time. - d. Global pages are disabled for all kernel structures not + c. Global pages are disabled for all kernel structures not mapped into both kernel and userspace page tables. This feature of the MMU allows different processes to share TLB entries mapping the kernel. Losing the feature means more diff --git a/MAINTAINERS b/MAINTAINERS index 3a28cee4f0ce..51e3a0d503dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -62,7 +62,15 @@ trivial patch so apply some common sense. 7. When sending security related changes or reports to a maintainer please Cc: [email protected], especially if the maintainer - does not respond. + does not respond. Please keep in mind that the security team is + a small set of people who can be efficient only when working on + verified bugs. Please only Cc: this list when you have identified + that the bug would present a short-term risk to other users if it + were publicly disclosed. For example, reports of address leaks do + not represent an immediate threat and are better handled publicly, + and ideally, should come with a patch proposal. Please do not send + automated reports to this list either. Such bugs will be handled + better and faster in the usual public places. 8. Happy hacking. @@ -9102,6 +9110,7 @@ F: drivers/usb/image/microtek.* MIPS M: Ralf Baechle <[email protected]> +M: James Hogan <[email protected]> W: http://www.linux-mips.org/ T: git git://git.linux-mips.org/pub/scm/ralf/linux.git @@ -12252,7 +12261,7 @@ M: Security Officers <[email protected]> S: Supported SECURITY SUBSYSTEM -M: James Morris <[email protected]> +M: James Morris <[email protected]> M: "Serge E. Hallyn" <[email protected]> L: [email protected] (suggested Cc:) T: git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git @@ -14771,9 +14780,9 @@ S: Maintained F: drivers/hid/hid-wiimote* WILOCITY WIL6210 WIRELESS DRIVER -M: Maya Erez <[email protected]> +M: Maya Erez <[email protected]> S: Supported W: http://wireless.kernel.org/en/users/Drivers/wil6210 F: drivers/net/wireless/ath/wil6210/ @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = -rc9 NAME = Fearless Coyote # *DOCUMENTATION* diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c index 37bd6d9b8eb9..a6bdc1da47ad 100644 --- a/arch/alpha/kernel/sys_sio.c +++ b/arch/alpha/kernel/sys_sio.c @@ -102,6 +102,15 @@ sio_pci_route(void) alpha_mv.sys.sio.route_tab); } +static bool sio_pci_dev_irq_needs_level(const struct pci_dev *dev) +{ + if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) && + (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA)) + return false; + + return true; +} + static unsigned int __init sio_collect_irq_levels(void) { @@ -110,8 +119,7 @@ sio_collect_irq_levels(void) /* Iterate through the devices, collecting IRQ levels. */ for_each_pci_dev(dev) { - if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) && - (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA)) + if (!sio_pci_dev_irq_needs_level(dev)) continue; if (dev->irq) @@ -120,8 +128,7 @@ sio_collect_irq_levels(void) return level_bits; } -static void __init -sio_fixup_irq_levels(unsigned int level_bits) +static void __sio_fixup_irq_levels(unsigned int level_bits, bool reset) { unsigned int old_level_bits; @@ -139,12 +146,21 @@ sio_fixup_irq_levels(unsigned int level_bits) */ old_level_bits = inb(0x4d0) | (inb(0x4d1) << 8); - level_bits |= (old_level_bits & 0x71ff); + if (reset) + old_level_bits &= 0x71ff; + + level_bits |= old_level_bits; outb((level_bits >> 0) & 0xff, 0x4d0); outb((level_bits >> 8) & 0xff, 0x4d1); } +static inline void +sio_fixup_irq_levels(unsigned int level_bits) +{ + __sio_fixup_irq_levels(level_bits, true); +} + static inline int noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { @@ -181,7 +197,14 @@ noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) const long min_idsel = 6, max_idsel = 14, irqs_per_slot = 5; int irq = COMMON_TABLE_LOOKUP, tmp; tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq); - return irq >= 0 ? tmp : -1; + + irq = irq >= 0 ? tmp : -1; + + /* Fixup IRQ level if an actual IRQ mapping is detected */ + if (sio_pci_dev_irq_needs_level(dev) && irq >= 0) + __sio_fixup_irq_levels(1 << irq, false); + + return irq; } static inline int diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S index 316a99aa9efe..1cfcfbbea6f0 100644 --- a/arch/alpha/lib/ev6-memset.S +++ b/arch/alpha/lib/ev6-memset.S @@ -18,7 +18,7 @@ * The algorithm for the leading and trailing quadwords remains the same, * however the loop has been unrolled to enable better memory throughput, * and the code has been replicated for each of the entry points: __memset - * and __memsetw to permit better scheduling to eliminate the stalling + * and __memset16 to permit better scheduling to eliminate the stalling * encountered during the mask replication. * A future enhancement might be to put in a byte store loop for really * small (say < 32 bytes) memset()s. Whether or not that change would be @@ -34,7 +34,7 @@ .globl memset .globl __memset .globl ___memset - .globl __memsetw + .globl __memset16 .globl __constant_c_memset .ent ___memset @@ -415,9 +415,9 @@ end: * to mask stalls. Note that entry point names also had to change */ .align 5 - .ent __memsetw + .ent __memset16 -__memsetw: +__memset16: .frame $30,0,$26,0 .prologue 0 @@ -596,8 +596,8 @@ end_w: nop ret $31,($26),1 # L0 : - .end __memsetw - EXPORT_SYMBOL(__memsetw) + .end __memset16 + EXPORT_SYMBOL(__memset16) memset = ___memset __memset = ___memset diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts index eed89e659143..a1f4d6d5a569 100644 --- a/arch/arm/boot/dts/da850-lcdk.dts +++ b/arch/arm/boot/dts/da850-lcdk.dts @@ -293,12 +293,12 @@ label = "u-boot env"; reg = <0 0x020000>; }; - partition@0x020000 { + partition@20000 { /* The LCDK defaults to booting from this partition */ label = "u-boot"; reg = <0x020000 0x080000>; }; - partition@0x0a0000 { + partition@a0000 { label = "free space"; reg = <0x0a0000 0>; }; diff --git a/arch/arm/boot/dts/imx6q-b450v3.dts b/arch/arm/boot/dts/imx6q-b450v3.dts index 404a93d9596b..3ec58500e9c2 100644 --- a/arch/arm/boot/dts/imx6q-b450v3.dts +++ b/arch/arm/boot/dts/imx6q-b450v3.dts @@ -112,3 +112,55 @@ line-name = "PCA9539-P07"; }; }; + +&pci_root { + /* Intel Corporation I210 Gigabit Network Connection */ + switch_nic: ethernet@3,0 { + compatible = "pci8086,1533"; + reg = <0x00010000 0 0 0 0>; + }; +}; + +&switch_ports { + port@0 { + reg = <0>; + label = "enacq"; + phy-handle = <&switchphy0>; + }; + + port@1 { + reg = <1>; + label = "eneport1"; + phy-handle = <&switchphy1>; + }; + + port@2 { + reg = <2>; + label = "enix"; + phy-handle = <&switchphy2>; + }; + + port@3 { + reg = <3>; + label = "enid"; + phy-handle = <&switchphy3>; + }; + + port@4 { + reg = <4>; + label = "cpu"; + ethernet = <&switch_nic>; + phy-handle = <&switchphy4>; + }; + + port@5 { + reg = <5>; + label = "enembc"; + + /* connected to Ethernet MAC of AT91RM9200 in MII mode */ + fixed-link { + speed = <100>; + full-duplex; + }; + }; +}; diff --git a/arch/arm/boot/dts/imx6q-b650v3.dts b/arch/arm/boot/dts/imx6q-b650v3.dts index 7f9f176901d4..5650a9b11091 100644 --- a/arch/arm/boot/dts/imx6q-b650v3.dts +++ b/arch/arm/boot/dts/imx6q-b650v3.dts @@ -111,3 +111,55 @@ fsl,tx-cal-45-dp-ohms = <55>; fsl,tx-d-cal = <100>; }; + +&pci_root { + /* Intel Corporation I210 Gigabit Network Connection */ + switch_nic: ethernet@3,0 { + compatible = "pci8086,1533"; + reg = <0x00010000 0 0 0 0>; + }; +}; + +&switch_ports { + port@0 { + reg = <0>; + label = "enacq"; + phy-handle = <&switchphy0>; + }; + + port@1 { + reg = <1>; + label = "eneport1"; + phy-handle = <&switchphy1>; + }; + + port@2 { + reg = <2>; + label = "enix"; + phy-handle = <&switchphy2>; + }; + + port@3 { + reg = <3>; + label = "enid"; + phy-handle = <&switchphy3>; + }; + + port@4 { + reg = <4>; + label = "cpu"; + ethernet = <&switch_nic>; + phy-handle = <&switchphy4>; + }; + + port@5 { + reg = <5>; + label = "enembc"; + + /* connected to Ethernet MAC of AT91RM9200 in MII mode */ + fixed-link { + speed = <100>; + full-duplex; + }; + }; +}; diff --git a/arch/arm/boot/dts/imx6q-b850v3.dts b/arch/arm/boot/dts/imx6q-b850v3.dts index 46bdc6722715..35edbdc7bcd1 100644 --- a/arch/arm/boot/dts/imx6q-b850v3.dts +++ b/arch/arm/boot/dts/imx6q-b850v3.dts @@ -212,3 +212,78 @@ }; }; }; + +&pci_root { + /* PLX Technology, Inc. PEX 8605 PCI Express 4-port Gen2 Switch */ + bridge@1,0 { + compatible = "pci10b5,8605"; + reg = <0x00010000 0 0 0 0>; + + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + + bridge@2,1 { + compatible = "pci10b5,8605"; + reg = <0x00020800 0 0 0 0>; + + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + + /* Intel Corporation I210 Gigabit Network Connection */ + ethernet@3,0 { + compatible = "pci8086,1533"; + reg = <0x00030000 0 0 0 0>; + }; + }; + + bridge@2,2 { + compatible = "pci10b5,8605"; + reg = <0x00021000 0 0 0 0>; + + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + + /* Intel Corporation I210 Gigabit Network Connection */ + switch_nic: ethernet@4,0 { + compatible = "pci8086,1533"; + reg = <0x00040000 0 0 0 0>; + }; + }; + }; +}; + +&switch_ports { + port@0 { + reg = <0>; + label = "eneport1"; + phy-handle = <&switchphy0>; + }; + + port@1 { + reg = <1>; + label = "eneport2"; + phy-handle = <&switchphy1>; + }; + + port@2 { + reg = <2>; + label = "enix"; + phy-handle = <&switchphy2>; + }; + + port@3 { + reg = <3>; + label = "enid"; + phy-handle = <&switchphy3>; + }; + + port@4 { + reg = <4>; + label = "cpu"; + ethernet = <&switch_nic>; + phy-handle = <&switchphy4>; + }; +}; diff --git a/arch/arm/boot/dts/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/imx6q-bx50v3.dtsi index b915837bbb5f..916ea94d75ca 100644 --- a/arch/arm/boot/dts/imx6q-bx50v3.dtsi +++ b/arch/arm/boot/dts/imx6q-bx50v3.dtsi @@ -92,6 +92,56 @@ mux-int-port = <1>; mux-ext-port = <4>; }; + + aliases { + mdio-gpio0 = &mdio0; + }; + + mdio0: mdio-gpio { + compatible = "virtual,mdio-gpio"; + gpios = <&gpio2 5 GPIO_ACTIVE_HIGH>, /* mdc */ + <&gpio2 7 GPIO_ACTIVE_HIGH>; /* mdio */ + + #address-cells = <1>; + #size-cells = <0>; + + switch@0 { + compatible = "marvell,mv88e6085"; /* 88e6240*/ + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + + switch_ports: ports { + #address-cells = <1>; + #size-cells = <0>; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switchphy0: switchphy@0 { + reg = <0>; + }; + + switchphy1: switchphy@1 { + reg = <1>; + }; + + switchphy2: switchphy@2 { + reg = <2>; + }; + + switchphy3: switchphy@3 { + reg = <3>; + }; + + switchphy4: switchphy@4 { + reg = <4>; + }; + }; + }; + }; }; &ecspi5 { @@ -326,3 +376,15 @@ tcxo-clock-frequency = <26000000>; }; }; + +&pcie { + /* Synopsys, Inc. Device */ + pci_root: root@0,0 { + compatible = "pci16c3,abcd"; + reg = <0x00000000 0 0 0 0>; + + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + }; +}; diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts index cf2f5240e176..27cc913ca0f5 100644 --- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts +++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts @@ -53,7 +53,8 @@ }; pinctrl: pin-controller@10000 { - pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>; + pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header + &pmx_gpio_header_gpo>; pinctrl-names = "default"; pmx_uart0: pmx-uart0 { @@ -85,11 +86,16 @@ * ground. */ pmx_gpio_header: pmx-gpio-header { - marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28", + marvell,pins = "mpp17", "mpp29", "mpp28", "mpp35", "mpp34", "mpp40"; marvell,function = "gpio"; }; + pmx_gpio_header_gpo: pxm-gpio-header-gpo { + marvell,pins = "mpp7"; + marvell,function = "gpo"; + }; + pmx_gpio_init: pmx-init { marvell,pins = "mpp38"; marvell,function = "gpio"; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 5840f5c75c3b..4f2f2eea0755 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -1104,7 +1104,7 @@ be1_out_tcon0: endpoint@0 { reg = <0>; - remote-endpoint = <&tcon1_in_be0>; + remote-endpoint = <&tcon0_in_be1>; }; be1_out_tcon1: endpoint@1 { diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 59655e42e4b0..bd0cd3204273 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -1354,7 +1354,7 @@ be1_out_tcon0: endpoint@0 { reg = <0>; - remote-endpoint = <&tcon1_in_be0>; + remote-endpoint = <&tcon0_in_be1>; }; be1_out_tcon1: endpoint@1 { diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 5caaf971fb50..df433abfcb02 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -10,6 +10,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 CONFIG_AEABI=y CONFIG_HIGHMEM=y +CONFIG_CMA=y CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_CPU_FREQ=y @@ -33,6 +34,7 @@ CONFIG_CAN_SUN4I=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_AHCI_SUNXI=y diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 4425189bb24c..41e2feb0cf4f 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -25,16 +25,58 @@ #include "bpf_jit_32.h" -int bpf_jit_enable __read_mostly; +/* + * eBPF prog stack layout: + * + * high + * original ARM_SP => +-----+ + * | | callee saved registers + * +-----+ <= (BPF_FP + SCRATCH_SIZE) + * | ... | eBPF JIT scratch space + * eBPF fp register => +-----+ + * (BPF_FP) | ... | eBPF prog stack + * +-----+ + * |RSVD | JIT scratchpad + * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) + * | | + * | ... | Function call stack + * | | + * +-----+ + * low + * + * The callee saved registers depends on whether frame pointers are enabled. + * With frame pointers (to be compliant with the ABI): + * + * high + * original ARM_SP => +------------------+ \ + * | pc | | + * current ARM_FP => +------------------+ } callee saved registers + * |r4-r8,r10,fp,ip,lr| | + * +------------------+ / + * low + * + * Without frame pointers: + * + * high + * original ARM_SP => +------------------+ + * | r4-r8,r10,fp,lr | callee saved registers + * current ARM_FP => +------------------+ + * low + * + * When popping registers off the stack at the end of a BPF function, we + * reference them via the current ARM_FP register. + */ +#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ + 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \ + 1 << ARM_FP) +#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) +#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) #define STACK_OFFSET(k) (k) #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ -/* Flags used for JIT optimization */ -#define SEEN_CALL (1 << 0) - #define FLAG_IMM_OVERFLOW (1 << 0) /* @@ -95,7 +137,6 @@ static const u8 bpf2a32[][2] = { * idx : index of current last JITed instruction. * prologue_bytes : bytes used in prologue. * epilogue_offset : offset of epilogue starting. - * seen : bit mask used for JIT optimization. * offsets : array of eBPF instruction offsets in * JITed code. * target : final JITed code. @@ -110,7 +151,6 @@ struct jit_ctx { unsigned int idx; unsigned int prologue_bytes; unsigned int epilogue_offset; - u32 seen; u32 flags; u32 *offsets; u32 *target; @@ -179,8 +219,13 @@ static void jit_fill_hole(void *area, unsigned int size) *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); } -/* Stack must be multiples of 16 Bytes */ -#define STACK_ALIGN(sz) (((sz) + 3) & ~3) +#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) +/* EABI requires the stack to be aligned to 64-bit boundaries */ +#define STACK_ALIGNMENT 8 +#else +/* Stack must be aligned to 32-bit boundaries */ +#define STACK_ALIGNMENT 4 +#endif /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, @@ -194,7 +239,7 @@ static void jit_fill_hole(void *area, unsigned int size) + SCRATCH_SIZE + \ + 4 /* extra for skb_copy_bits buffer */) -#define STACK_SIZE STACK_ALIGN(_STACK_SIZE) +#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) /* Get the offset of eBPF REGISTERs stored on scratch space. */ #define STACK_VAR(off) (STACK_SIZE-off-4) @@ -285,16 +330,19 @@ static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) emit_mov_i_no8m(rd, val, ctx); } -static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) +static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) { - ctx->seen |= SEEN_CALL; -#if __LINUX_ARM_ARCH__ < 5 - emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); - if (elf_hwcap & HWCAP_THUMB) emit(ARM_BX(tgt_reg), ctx); else emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); +} + +static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) +{ +#if __LINUX_ARM_ARCH__ < 5 + emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); + emit_bx_r(tgt_reg, ctx); #else emit(ARM_BLX_R(tgt_reg), ctx); #endif @@ -354,7 +402,6 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) } /* Call appropriate function */ - ctx->seen |= SEEN_CALL; emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx); emit_blx_r(ARM_IP, ctx); @@ -620,8 +667,6 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, /* Do LSH operation */ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); @@ -656,8 +701,6 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, /* Do the ARSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); _emit(ARM_COND_MI, ARM_B(0), ctx); @@ -692,8 +735,6 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk, /* Do LSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); @@ -828,8 +869,6 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, /* Do Multiplication */ emit(ARM_MUL(ARM_IP, rd, rn), ctx); emit(ARM_MUL(ARM_LR, rm, rt), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); @@ -872,33 +911,53 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, } /* dst = *(size*)(src + off) */ -static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk, - const s32 off, struct jit_ctx *ctx, const u8 sz){ +static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, + s32 off, struct jit_ctx *ctx, const u8 sz){ const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[1] : dst; + const u8 *rd = dstk ? tmp : dst; u8 rm = src; + s32 off_max; - if (off) { + if (sz == BPF_H) + off_max = 0xff; + else + off_max = 0xfff; + + if (off < 0 || off > off_max) { emit_a32_mov_i(tmp[0], off, false, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; + off = 0; + } else if (rd[1] == rm) { + emit(ARM_MOV_R(tmp[0], rm), ctx); + rm = tmp[0]; } switch (sz) { - case BPF_W: - /* Load a Word */ - emit(ARM_LDR_I(rd, rm, 0), ctx); + case BPF_B: + /* Load a Byte */ + emit(ARM_LDRB_I(rd[1], rm, off), ctx); + emit_a32_mov_i(dst[0], 0, dstk, ctx); break; case BPF_H: /* Load a HalfWord */ - emit(ARM_LDRH_I(rd, rm, 0), ctx); + emit(ARM_LDRH_I(rd[1], rm, off), ctx); + emit_a32_mov_i(dst[0], 0, dstk, ctx); break; - case BPF_B: - /* Load a Byte */ - emit(ARM_LDRB_I(rd, rm, 0), ctx); + case BPF_W: + /* Load a Word */ + emit(ARM_LDR_I(rd[1], rm, off), ctx); + emit_a32_mov_i(dst[0], 0, dstk, ctx); + break; + case BPF_DW: + /* Load a Double Word */ + emit(ARM_LDR_I(rd[1], rm, off), ctx); + emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); break; } if (dstk) - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx); + if (dstk && sz == BPF_DW) + emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx); } /* Arithmatic Operation */ @@ -906,7 +965,6 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, const u8 rn, struct jit_ctx *ctx, u8 op) { switch (op) { case BPF_JSET: - ctx->seen |= SEEN_CALL; emit(ARM_AND_R(ARM_IP, rt, rn), ctx); emit(ARM_AND_R(ARM_LR, rd, rm), ctx); emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); @@ -945,7 +1003,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) const u8 *tcc = bpf2a32[TCALL_CNT]; const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) -#define jmp_offset (out_offset - (cur_offset)) +#define jmp_offset (out_offset - (cur_offset) - 2) u32 off, lo, hi; /* if (index >= array->map.max_entries) @@ -956,7 +1014,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit_a32_mov_i(tmp[1], off, false, ctx); emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); - /* index (64 bit) */ + /* index is 32-bit for arrays */ emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); /* index >= array->map.max_entries */ emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); @@ -997,7 +1055,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit_a32_mov_i(tmp2[1], off, false, ctx); emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); - emit(ARM_BX(tmp[1]), ctx); + emit_bx_r(tmp[1], ctx); /* out: */ if (out_offset == -1) @@ -1070,54 +1128,22 @@ static void build_prologue(struct jit_ctx *ctx) const u8 r2 = bpf2a32[BPF_REG_1][1]; const u8 r3 = bpf2a32[BPF_REG_1][0]; const u8 r4 = bpf2a32[BPF_REG_6][1]; - const u8 r5 = bpf2a32[BPF_REG_6][0]; - const u8 r6 = bpf2a32[TMP_REG_1][1]; - const u8 r7 = bpf2a32[TMP_REG_1][0]; - const u8 r8 = bpf2a32[TMP_REG_2][1]; - const u8 r10 = bpf2a32[TMP_REG_2][0]; const u8 fplo = bpf2a32[BPF_REG_FP][1]; const u8 fphi = bpf2a32[BPF_REG_FP][0]; - const u8 sp = ARM_SP; const u8 *tcc = bpf2a32[TCALL_CNT]; - u16 reg_set = 0; - - /* - * eBPF prog stack layout - * - * high - * original ARM_SP => +-----+ eBPF prologue - * |FP/LR| - * current ARM_FP => +-----+ - * | ... | callee saved registers - * eBPF fp register => +-----+ <= (BPF_FP) - * | ... | eBPF JIT scratch space - * | | eBPF prog stack - * +-----+ - * |RSVD | JIT scratchpad - * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE) - * | | - * | ... | Function call stack - * | | - * +-----+ - * low - */ - /* Save callee saved registers. */ - reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10); #ifdef CONFIG_FRAME_POINTER - reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC); - emit(ARM_MOV_R(ARM_IP, sp), ctx); + u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC; + emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); emit(ARM_PUSH(reg_set), ctx); emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); #else - /* Check if call instruction exists in BPF body */ - if (ctx->seen & SEEN_CALL) - reg_set |= (1<<ARM_LR); - emit(ARM_PUSH(reg_set), ctx); + emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); + emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); #endif /* Save frame pointer for later */ - emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx); + emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx); ctx->stack_size = imm8m(STACK_SIZE); @@ -1140,33 +1166,19 @@ static void build_prologue(struct jit_ctx *ctx) /* end of prologue */ } +/* restore callee saved registers. */ static void build_epilogue(struct jit_ctx *ctx) { - const u8 r4 = bpf2a32[BPF_REG_6][1]; - const u8 r5 = bpf2a32[BPF_REG_6][0]; - const u8 r6 = bpf2a32[TMP_REG_1][1]; - const u8 r7 = bpf2a32[TMP_REG_1][0]; - const u8 r8 = bpf2a32[TMP_REG_2][1]; - const u8 r10 = bpf2a32[TMP_REG_2][0]; - u16 reg_set = 0; - - /* unwind function call stack */ - emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); - - /* restore callee saved registers. */ - reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10); #ifdef CONFIG_FRAME_POINTER - /* the first instruction of the prologue was: mov ip, sp */ - reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC); + /* When using frame pointers, some additional registers need to + * be loaded. */ + u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP; + emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx); emit(ARM_LDM(ARM_SP, reg_set), ctx); #else - if (ctx->seen & SEEN_CALL) - reg_set |= (1<<ARM_PC); /* Restore callee saved registers. */ - emit(ARM_POP(reg_set), ctx); - /* Return back to the callee function */ - if (!(ctx->seen & SEEN_CALL)) - emit(ARM_BX(ARM_LR), ctx); + emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx); + emit(ARM_POP(CALLEE_POP_MASK), ctx); #endif } @@ -1394,8 +1406,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_rev32(rt, rt, ctx); goto emit_bswap_uxt; case 64: - /* Because of the usage of ARM_LR */ - ctx->seen |= SEEN_CALL; emit_rev32(ARM_LR, rt, ctx); emit_rev32(rt, rd, ctx); emit(ARM_MOV_R(rd, ARM_LR), ctx); @@ -1448,22 +1458,7 @@ exit: rn = sstk ? tmp2[1] : src_lo; if (sstk) emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - switch (BPF_SIZE(code)) { - case BPF_W: - /* Load a Word */ - case BPF_H: - /* Load a Half-Word */ - case BPF_B: - /* Load a Byte */ - emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code)); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); - break; - case BPF_DW: - /* Load a double word */ - emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W); - emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W); - break; - } + emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); break; /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ case BPF_LD | BPF_ABS | BPF_W: diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 7c9bdc7ab50b..9db19314c60c 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -66,6 +66,7 @@ <&cpu1>, <&cpu2>, <&cpu3>; + interrupt-parent = <&intc>; }; psci { diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index e3b64d03fbd8..9c7724e82aff 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -63,8 +63,10 @@ cpm_ethernet: ethernet@0 { compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; - clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, <&cpm_clk 1 5>; - clock-names = "pp_clk", "gop_clk", "mg_clk"; + clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, + <&cpm_clk 1 5>, <&cpm_clk 1 18>; + clock-names = "pp_clk", "gop_clk", + "mg_clk","axi_clk"; marvell,system-controller = <&cpm_syscon0>; status = "disabled"; dma-coherent; @@ -155,7 +157,8 @@ #size-cells = <0>; compatible = "marvell,orion-mdio"; reg = <0x12a200 0x10>; - clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>; + clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>, + <&cpm_clk 1 6>, <&cpm_clk 1 18>; status = "disabled"; }; @@ -338,8 +341,8 @@ compatible = "marvell,armada-cp110-sdhci"; reg = <0x780000 0x300>; interrupts = <ICU_GRP_NSR 27 IRQ_TYPE_LEVEL_HIGH>; - clock-names = "core"; - clocks = <&cpm_clk 1 4>; + clock-names = "core","axi"; + clocks = <&cpm_clk 1 4>, <&cpm_clk 1 18>; dma-coherent; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 0d51096c69f8..87ac68b2cf37 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -63,8 +63,10 @@ cps_ethernet: ethernet@0 { compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; - clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, <&cps_clk 1 5>; - clock-names = "pp_clk", "gop_clk", "mg_clk"; + clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, + <&cps_clk 1 5>, <&cps_clk 1 18>; + clock-names = "pp_clk", "gop_clk", + "mg_clk", "axi_clk"; marvell,system-controller = <&cps_syscon0>; status = "disabled"; dma-coherent; @@ -155,7 +157,8 @@ #size-cells = <0>; compatible = "marvell,orion-mdio"; reg = <0x12a200 0x10>; - clocks = <&cps_clk 1 9>, <&cps_clk 1 5>; + clocks = <&cps_clk 1 9>, <&cps_clk 1 5>, + <&cps_clk 1 6>, <&cps_clk 1 18>; status = "disabled"; }; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 304203fa9e33..e60494f1eef9 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -45,7 +45,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_psci_call(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -54,7 +54,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index acaa935ed977..0775d5ab8ee9 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -31,8 +31,6 @@ #include "bpf_jit.h" -int bpf_jit_enable __read_mostly; - #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) @@ -162,7 +160,8 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) /* Stack must be multiples of 16B */ #define STACK_ALIGN(sz) (((sz) + 15) & ~15) -#define PROLOGUE_OFFSET 8 +/* Tail call offset to jump into */ +#define PROLOGUE_OFFSET 7 static int build_prologue(struct jit_ctx *ctx) { @@ -214,19 +213,19 @@ static int build_prologue(struct jit_ctx *ctx) /* Initialize tail_call_cnt */ emit(A64_MOVZ(1, tcc, 0, 0), ctx); - /* 4 byte extra for skb_copy_bits buffer */ - ctx->stack_size = prog->aux->stack_depth + 4; - ctx->stack_size = STACK_ALIGN(ctx->stack_size); - - /* Set up function call stack */ - emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); - cur_offset = ctx->idx - idx0; if (cur_offset != PROLOGUE_OFFSET) { pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", cur_offset, PROLOGUE_OFFSET); return -1; } + + /* 4 byte extra for skb_copy_bits buffer */ + ctx->stack_size = prog->aux->stack_depth + 4; + ctx->stack_size = STACK_ALIGN(ctx->stack_size); + + /* Set up function call stack */ + emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); return 0; } @@ -274,11 +273,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(A64_LDR64(prg, tmp, prg), ctx); emit(A64_CBZ(1, prg, jmp_offset), ctx); - /* goto *(prog->bpf_func + prologue_size); */ + /* goto *(prog->bpf_func + prologue_offset); */ off = offsetof(struct bpf_prog, bpf_func); emit_a64_mov_i64(tmp, off, ctx); emit(A64_LDR64(tmp, prg, tmp), ctx); emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); + emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); emit(A64_BR(tmp), ctx); /* out: */ diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 28e02c99be6d..762eeb0fcc1d 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -65,29 +65,30 @@ ia64_atomic_fetch_##op (int i, atomic_t *v) \ ATOMIC_OPS(add, +) ATOMIC_OPS(sub, -) -#define atomic_add_return(i,v) \ +#ifdef __OPTIMIZE__ +#define __ia64_atomic_const(i) __builtin_constant_p(i) ? \ + ((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 || \ + (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0 + +#define atomic_add_return(i, v) \ ({ \ - int __ia64_aar_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ - || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ - || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ - || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ - ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ - : ia64_atomic_add(__ia64_aar_i, v); \ + int __i = (i); \ + static const int __ia64_atomic_p = __ia64_atomic_const(i); \ + __ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) : \ + ia64_atomic_add(__i, v); \ }) -#define atomic_sub_return(i,v) \ +#define atomic_sub_return(i, v) \ ({ \ - int __ia64_asr_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ - || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ - || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ - || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ - ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ - : ia64_atomic_sub(__ia64_asr_i, v); \ + int __i = (i); \ + static const int __ia64_atomic_p = __ia64_atomic_const(i); \ + __ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) : \ + ia64_atomic_sub(__i, v); \ }) +#else +#define atomic_add_return(i, v) ia64_atomic_add(i, v) +#define atomic_sub_return(i, v) ia64_atomic_sub(i, v) +#endif #define atomic_fetch_add(i,v) \ ({ \ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 350a990fc719..8e0b3702f1c0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -259,6 +259,7 @@ config BCM47XX select LEDS_GPIO_REGISTER select BCM47XX_NVRAM select BCM47XX_SPROM + select BCM47XX_SSB if !BCM47XX_BCMA help Support for BCM47XX based boards @@ -389,6 +390,7 @@ config LANTIQ select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_MIPS16 select SYS_SUPPORTS_MULTITHREADING + select SYS_SUPPORTS_VPE_LOADER select SYS_HAS_EARLY_PRINTK select GPIOLIB select SWAP_IO_SPACE @@ -516,6 +518,7 @@ config MIPS_MALTA select SYS_SUPPORTS_MIPS16 select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_SMARTMIPS + select SYS_SUPPORTS_VPE_LOADER select SYS_SUPPORTS_ZBOOT select SYS_SUPPORTS_RELOCATABLE select USE_OF @@ -2281,9 +2284,16 @@ config MIPSR2_TO_R6_EMULATOR The only reason this is a build-time option is to save ~14K from the final kernel image. +config SYS_SUPPORTS_VPE_LOADER + bool + depends on SYS_SUPPORTS_MULTITHREADING + help + Indicates that the platform supports the VPE loader, and provides + physical_memsize. + config MIPS_VPE_LOADER bool "VPE loader support." - depends on SYS_SUPPORTS_MULTITHREADING && MODULES + depends on SYS_SUPPORTS_VPE_LOADER && MODULES select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select MIPS_MT diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index 464af5e025d6..0749c3724543 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -124,30 +124,36 @@ config SCACHE_DEBUGFS If unsure, say N. -menuconfig MIPS_CPS_NS16550 +menuconfig MIPS_CPS_NS16550_BOOL bool "CPS SMP NS16550 UART output" depends on MIPS_CPS help Output debug information via an ns16550 compatible UART if exceptions occur early in the boot process of a secondary core. -if MIPS_CPS_NS16550 +if MIPS_CPS_NS16550_BOOL + +config MIPS_CPS_NS16550 + def_bool MIPS_CPS_NS16550_BASE != 0 config MIPS_CPS_NS16550_BASE hex "UART Base Address" default 0x1b0003f8 if MIPS_MALTA + default 0 help The base address of the ns16550 compatible UART on which to output debug information from the early stages of core startup. + This is only used if non-zero. + config MIPS_CPS_NS16550_SHIFT int "UART Register Shift" - default 0 if MIPS_MALTA + default 0 help The number of bits to shift ns16550 register indices by in order to form their addresses. That is, log base 2 of the span between adjacent ns16550 registers in the system. -endif # MIPS_CPS_NS16550 +endif # MIPS_CPS_NS16550_BOOL endmenu diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index 4674f1efbe7a..e1675c25d5d4 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -575,7 +575,7 @@ static int __init ar7_register_uarts(void) uart_port.type = PORT_AR7; uart_port.uartclk = clk_get_rate(bus_clk) / 2; uart_port.iotype = UPIO_MEM32; - uart_port.flags = UPF_FIXED_TYPE; + uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF; uart_port.regshift = 2; uart_port.line = 0; diff --git a/arch/mips/ath25/devices.c b/arch/mips/ath25/devices.c index e1156347da53..301a9028273c 100644 --- a/arch/mips/ath25/devices.c +++ b/arch/mips/ath25/devices.c @@ -73,6 +73,7 @@ const char *get_system_type(void) void __init ath25_serial_setup(u32 mapbase, int irq, unsigned int uartclk) { +#ifdef CONFIG_SERIAL_8250_CONSOLE struct uart_port s; memset(&s, 0, sizeof(s)); @@ -85,6 +86,7 @@ void __init ath25_serial_setup(u32 mapbase, int irq, unsigned int uartclk) s.uartclk = uartclk; early_serial_setup(&s); +#endif /* CONFIG_SERIAL_8250_CONSOLE */ } int __init ath25_add_wmac(int nr, u32 base, int irq) diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index dd5567b1e305..8f5bd04f320a 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -292,7 +292,6 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core, *this_cpu_ptr(&cm_core_lock_flags)); } else { WARN_ON(cluster != 0); - WARN_ON(vp != 0); WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL); /* diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 78c2affeabf8..e84e12655fa8 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -16,4 +16,5 @@ obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o # libgcc-style stuff needed in the kernel -obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o +obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \ + ucmpdi2.o diff --git a/arch/mips/lib/libgcc.h b/arch/mips/lib/libgcc.h index 28002ed90c2c..199a7f96282f 100644 --- a/arch/mips/lib/libgcc.h +++ b/arch/mips/lib/libgcc.h @@ -10,10 +10,18 @@ typedef int word_type __attribute__ ((mode (__word__))); struct DWstruct { int high, low; }; + +struct TWstruct { + long long high, low; +}; #elif defined(__LITTLE_ENDIAN) struct DWstruct { int low, high; }; + +struct TWstruct { + long long low, high; +}; #else #error I feel sick. #endif @@ -23,4 +31,13 @@ typedef union { long long ll; } DWunion; +#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) +typedef int ti_type __attribute__((mode(TI))); + +typedef union { + struct TWstruct s; + ti_type ti; +} TWunion; +#endif + #endif /* __ASM_LIBGCC_H */ diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c new file mode 100644 index 000000000000..111ad475aa0c --- /dev/null +++ b/arch/mips/lib/multi3.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/export.h> + +#include "libgcc.h" + +/* + * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that + * specific case only we'll implement it here. + * + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981 + */ +#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7) + +/* multiply 64-bit values, low 64-bits returned */ +static inline long long notrace dmulu(long long a, long long b) +{ + long long res; + + asm ("dmulu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +/* multiply 64-bit unsigned values, high 64-bits of 128-bit result returned */ +static inline long long notrace dmuhu(long long a, long long b) +{ + long long res; + + asm ("dmuhu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +/* multiply 128-bit values, low 128-bits returned */ +ti_type notrace __multi3(ti_type a, ti_type b) +{ + TWunion res, aa, bb; + + aa.ti = a; + bb.ti = b; + + /* + * a * b = (a.lo * b.lo) + * + 2^64 * (a.hi * b.lo + a.lo * b.hi) + * [+ 2^128 * (a.hi * b.hi)] + */ + res.s.low = dmulu(aa.s.low, bb.s.low); + res.s.high = dmuhu(aa.s.low, bb.s.low); + res.s.high += dmulu(aa.s.high, bb.s.low); + res.s.high += dmulu(aa.s.low, bb.s.high); + + return res.ti; +} +EXPORT_SYMBOL(__multi3); + +#endif /* 64BIT && CPU_MIPSR6 && GCC7 */ diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c index cdb5a191b9d5..9bb6baa45da3 100644 --- a/arch/mips/mm/uasm-micromips.c +++ b/arch/mips/mm/uasm-micromips.c @@ -40,7 +40,7 @@ #include "uasm.c" -static const struct insn const insn_table_MM[insn_invalid] = { +static const struct insn insn_table_MM[insn_invalid] = { [insn_addu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD}, [insn_addiu] = {M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, [insn_and] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD}, diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 44b925005dd3..4d8cb9bb8365 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1207,8 +1207,6 @@ jmp_cmp: return 0; } -int bpf_jit_enable __read_mostly; - void bpf_jit_compile(struct bpf_prog *fp) { struct jit_ctx ctx; diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 97069a1b6f43..4e347030ed2c 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -177,8 +177,6 @@ static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) (ctx->idx * 4) - 4; } -int bpf_jit_enable __read_mostly; - enum which_ebpf_reg { src_reg, src_reg_no_fp, diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c index d4469b20d176..4f46a4509f79 100644 --- a/arch/mips/ralink/timer.c +++ b/arch/mips/ralink/timer.c @@ -109,9 +109,9 @@ static int rt_timer_probe(struct platform_device *pdev) } rt->irq = platform_get_irq(pdev, 0); - if (!rt->irq) { + if (rt->irq < 0) { dev_err(&pdev->dev, "failed to load irq\n"); - return -ENOENT; + return rt->irq; } rt->membase = devm_ioremap_resource(&pdev->dev, res); diff --git a/arch/mips/rb532/Makefile b/arch/mips/rb532/Makefile index efdecdb6e3ea..8186afca2234 100644 --- a/arch/mips/rb532/Makefile +++ b/arch/mips/rb532/Makefile @@ -2,4 +2,6 @@ # Makefile for the RB532 board specific parts of the kernel # -obj-y += irq.o time.o setup.o serial.o prom.o gpio.o devices.o +obj-$(CONFIG_SERIAL_8250_CONSOLE) += serial.o + +obj-y += irq.o time.o setup.o prom.o gpio.o devices.o diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 32ea3e6731d6..354d258396ff 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -310,6 +310,8 @@ static int __init plat_setup_devices(void) return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs)); } +#ifdef CONFIG_NET + static int __init setup_kmac(char *s) { printk(KERN_INFO "korina mac = %s\n", s); @@ -322,4 +324,6 @@ static int __init setup_kmac(char *s) __setup("kmac=", setup_kmac); +#endif /* CONFIG_NET */ + arch_initcall(plat_setup_devices); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..2ed525a44734 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -166,6 +166,7 @@ config PPC select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_SMP_IDLE_THREAD diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index f0461618bf7b..eca3f9c68907 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -353,6 +353,7 @@ #define PROC_TABLE_GTSE 0x01 #ifndef __ASSEMBLY__ +#include <linux/types.h> /** * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 61d6049f4c1e..637b7263cb86 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -443,6 +443,31 @@ struct kvm_ppc_rmmu_info { __u32 ap_encodings[8]; }; +/* For KVM_PPC_GET_CPU_CHAR */ +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +/* + * Values for character and character_mask. + * These are identical to the values used by H_GET_CPU_CHARACTERISTICS. + */ +#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 (1ULL << 63) +#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED (1ULL << 62) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 (1ULL << 61) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 (1ULL << 60) +#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV (1ULL << 59) +#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) +#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) +#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) + +#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) +#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) +#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d213542a48b..8fd3a70047f1 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short maj; unsigned short min; - /* We only show online cpus: disable preempt (overzealous, I - * knew) to prevent cpu going down. */ - preempt_disable(); - if (!cpu_online(cpu_id)) { - preempt_enable(); - return 0; - } - #ifdef CONFIG_SMP pvr = per_cpu(cpu_pvr, cpu_id); #else @@ -358,9 +350,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "\n"); #endif - - preempt_enable(); - /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) show_cpuinfo_summary(m); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 491be4179ddd..e67413f4a8f0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -38,6 +38,7 @@ #include <linux/memory.h> #include <linux/nmi.h> +#include <asm/debugfs.h> #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -901,4 +902,41 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) if (!no_rfi_flush) rfi_flush_enable(enable); } + +#ifdef CONFIG_DEBUG_FS +static int rfi_flush_set(void *data, u64 val) +{ + if (val == 1) + rfi_flush_enable(true); + else if (val == 0) + rfi_flush_enable(false); + else + return -EINVAL; + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); +#endif + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (rfi_flush) + return sprintf(buf, "Mitigation: RFI Flush\n"); + + return sprintf(buf, "Vulnerable\n"); +} #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1915e86cef6f..0a7c88786ec0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -39,6 +39,10 @@ #include <asm/iommu.h> #include <asm/switch_to.h> #include <asm/xive.h> +#ifdef CONFIG_PPC_PSERIES +#include <asm/hvcall.h> +#include <asm/plpar_wrappers.h> +#endif #include "timing.h" #include "irq.h" @@ -548,6 +552,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif + case KVM_CAP_PPC_GET_CPU_CHAR: r = 1; break; @@ -1759,6 +1764,124 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, return r; } +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * These functions check whether the underlying hardware is safe + * against attacks based on observing the effects of speculatively + * executed instructions, and whether it supplies instructions for + * use in workarounds. The information comes from firmware, either + * via the device tree on powernv platforms or from an hcall on + * pseries platforms. + */ +#ifdef CONFIG_PPC_PSERIES +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + struct h_cpu_char_result c; + unsigned long rc; + + if (!machine_is(pseries)) + return -ENOTTY; + + rc = plpar_get_cpu_characteristics(&c); + if (rc == H_SUCCESS) { + cp->character = c.character; + cp->behaviour = c.behaviour; + cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | + KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | + KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | + KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | + KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | + KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | + KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | + KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + } + return 0; +} +#else +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + return -ENOTTY; +} +#endif + +static inline bool have_fw_feat(struct device_node *fw_features, + const char *state, const char *name) +{ + struct device_node *np; + bool r = false; + + np = of_get_child_by_name(fw_features, name); + if (np) { + r = of_property_read_bool(np, state); + of_node_put(np); + } + return r; +} + +static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + struct device_node *np, *fw_features; + int r; + + memset(cp, 0, sizeof(*cp)); + r = pseries_get_cpu_char(cp); + if (r != -ENOTTY) + return r; + + np = of_find_node_by_name(NULL, "ibm,opal"); + if (np) { + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + if (!fw_features) + return 0; + if (have_fw_feat(fw_features, "enabled", + "inst-spec-barrier-ori31,31,0")) + cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31; + if (have_fw_feat(fw_features, "enabled", + "fw-bcctrl-serialized")) + cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED; + if (have_fw_feat(fw_features, "enabled", + "inst-l1d-flush-ori30,30,0")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30; + if (have_fw_feat(fw_features, "enabled", + "inst-l1d-flush-trig2")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2; + if (have_fw_feat(fw_features, "enabled", + "fw-l1d-thread-split")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV; + if (have_fw_feat(fw_features, "enabled", + "fw-count-cache-disabled")) + cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | + KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | + KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | + KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | + KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + + if (have_fw_feat(fw_features, "enabled", + "speculation-policy-favor-security")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY; + if (!have_fw_feat(fw_features, "disabled", + "needs-l1d-flush-msr-pr-0-to-1")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR; + if (!have_fw_feat(fw_features, "disabled", + "needs-spec-barrier-for-bound-checks")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | + KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + + of_node_put(fw_features); + } + + return 0; +} +#endif + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1861,6 +1984,14 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_PPC_GET_CPU_CHAR: { + struct kvm_ppc_cpu_char cpuchar; + + r = kvmppc_get_cpu_char(&cpuchar); + if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar))) + r = -EFAULT; + break; + } default: { struct kvm *kvm = filp->private_data; r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index f9941b3b5770..872d1f6dd11e 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -18,8 +18,6 @@ #include "bpf_jit32.h" -int bpf_jit_enable __read_mostly; - static inline void bpf_flush_icache(void *start, void *end) { smp_wmb(); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 6771c63b2bec..217a78e84865 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -21,8 +21,6 @@ #include "bpf_jit64.h" -int bpf_jit_enable __read_mostly; - static void bpf_jit_fill_ill_insns(void *area, unsigned int size) { memset32(area, BREAKPOINT_INSTRUCTION, size/4); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index cab24f549e7c..0ddc7ac6c5f1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2344,10 +2344,10 @@ static void dump_one_paca(int cpu) DUMP(p, kernel_toc, "lx"); DUMP(p, kernelbase, "lx"); DUMP(p, kernel_msr, "lx"); - DUMP(p, emergency_sp, "p"); + DUMP(p, emergency_sp, "px"); #ifdef CONFIG_PPC_BOOK3S_64 - DUMP(p, nmi_emergency_sp, "p"); - DUMP(p, mc_emergency_sp, "p"); + DUMP(p, nmi_emergency_sp, "px"); + DUMP(p, mc_emergency_sp, "px"); DUMP(p, in_nmi, "x"); DUMP(p, in_mce, "x"); DUMP(p, hmi_event_available, "x"); @@ -2375,17 +2375,21 @@ static void dump_one_paca(int cpu) DUMP(p, slb_cache_ptr, "x"); for (i = 0; i < SLB_CACHE_ENTRIES; i++) printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]); + + DUMP(p, rfi_flush_fallback_area, "px"); + DUMP(p, l1d_flush_congruence, "llx"); + DUMP(p, l1d_flush_sets, "llx"); #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E - DUMP(p, pgd, "p"); - DUMP(p, kernel_pgd, "p"); - DUMP(p, tcd_ptr, "p"); - DUMP(p, mc_kstack, "p"); - DUMP(p, crit_kstack, "p"); - DUMP(p, dbg_kstack, "p"); + DUMP(p, pgd, "px"); + DUMP(p, kernel_pgd, "px"); + DUMP(p, tcd_ptr, "px"); + DUMP(p, mc_kstack, "px"); + DUMP(p, crit_kstack, "px"); + DUMP(p, dbg_kstack, "px"); #endif - DUMP(p, __current, "p"); + DUMP(p, __current, "px"); DUMP(p, kstack, "lx"); printf(" kstack_base = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1)); DUMP(p, stab_rr, "lx"); @@ -2403,7 +2407,7 @@ static void dump_one_paca(int cpu) #endif #ifdef CONFIG_PPC_POWERNV - DUMP(p, core_idle_state_ptr, "p"); + DUMP(p, core_idle_state_ptr, "px"); DUMP(p, thread_idle_state, "x"); DUMP(p, thread_mask, "x"); DUMP(p, subcore_sibling_mask, "x"); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index e14f381757f6..c1b0a9ac1dc8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -207,7 +207,8 @@ struct kvm_s390_sie_block { __u16 ipa; /* 0x0056 */ __u32 ipb; /* 0x0058 */ __u32 scaoh; /* 0x005c */ - __u8 reserved60; /* 0x0060 */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ #define ECB_GS 0x40 #define ECB_TE 0x10 #define ECB_SRSI 0x04 diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 38535a57fef8..4cdaa55fabfe 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -224,6 +224,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) #define KVM_SYNC_GSCB (1UL << 9) +#define KVM_SYNC_BPBC (1UL << 10) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) @@ -247,7 +248,9 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding1[52]; /* riccb needs to be 64byte aligned */ + __u8 bpbc : 1; /* bp mode */ + __u8 reserved2 : 7; + __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ union { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2c93cbbcd15e..2598cf243b86 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -421,6 +421,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_GS: r = test_facility(133); break; + case KVM_CAP_S390_BPB: + r = test_facility(82); + break; default: r = 0; } @@ -2198,6 +2201,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 82)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; if (test_kvm_facility(vcpu->kvm, 133)) vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; /* fprs can be synchronized via vrs, even if the guest has no vx. With @@ -2339,6 +2344,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) current->thread.fpu.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) @@ -3298,6 +3304,11 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; vcpu->arch.gs_enabled = 1; } + if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && + test_kvm_facility(vcpu->kvm, 82)) { + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; + vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; + } save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); /* save host (userspace) fprs/vrs */ @@ -3344,6 +3355,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->s.regs.pft = vcpu->arch.pfault_token; kvm_run->s.regs.pfs = vcpu->arch.pfault_select; kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; + kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); /* Save guest register state */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 5d6ae0326d9e..751348348477 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -223,6 +223,12 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) memcpy(scb_o->gcr, scb_s->gcr, 128); scb_o->pp = scb_s->pp; + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) { + scb_o->fpf &= ~FPF_BPBC; + scb_o->fpf |= scb_s->fpf & FPF_BPBC; + } + /* interrupt intercept */ switch (scb_s->icptcode) { case ICPT_PROGI: @@ -265,6 +271,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->ecb3 = 0; scb_s->ecd = 0; scb_s->fac = 0; + scb_s->fpf = 0; rc = prepare_cpuflags(vcpu, vsie_page); if (rc) @@ -324,6 +331,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) prefix_unmapped(vsie_page); scb_s->ecb |= scb_o->ecb & ECB_TE; } + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) + scb_s->fpf |= scb_o->fpf & FPF_BPBC; /* SIMD */ if (test_kvm_facility(vcpu->kvm, 129)) { scb_s->eca |= scb_o->eca & ECA_VX; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 1dfadbd126f3..e50188773ff3 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -28,8 +28,6 @@ #include <asm/set_memory.h> #include "bpf_jit.h" -int bpf_jit_enable __read_mostly; - struct bpf_jit { u32 seen; /* Flags to remember seen eBPF instructions */ u32 seen_reg[16]; /* Array to remember which registers are used */ diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index 818d3aa5172e..d257186c27d1 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o -obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o +obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) += camellia-sparc64.o obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c index 09e318eb34ee..3bd8ca95e521 100644 --- a/arch/sparc/net/bpf_jit_comp_32.c +++ b/arch/sparc/net/bpf_jit_comp_32.c @@ -11,8 +11,6 @@ #include "bpf_jit_32.h" -int bpf_jit_enable __read_mostly; - static inline bool is_simm13(unsigned int value) { return value + 0x1000 < 0x2000; diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 635fdefd4ae2..50a24d7bd4c5 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -12,8 +12,6 @@ #include "bpf_jit_64.h" -int bpf_jit_enable __read_mostly; - static inline bool is_simm13(unsigned int value) { return value + 0x1000 < 0x2000; diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a1f28a54f23a..60c4c342316c 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl %esi popl %edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4f8e1d35a97c..ff6f8022612c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -491,6 +491,17 @@ ENTRY(__switch_to_asm) movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq %r15 popq %r14 @@ -1253,7 +1264,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1 #endif #ifdef CONFIG_X86_MCE -idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) +idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 005908ee9333..a2efb490f743 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -755,14 +755,14 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a9e57f08bfa6..98722773391d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); +extern void init_bsp_APIC(void); extern void apic_intr_mode_init(void); extern void setup_local_APIC(void); extern void init_apic_mappings(void); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f275447862f4..25b9375c1484 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -206,11 +206,11 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -245,6 +245,7 @@ #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c9459a4c3c68..22c5f3e6f820 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data); void __init sme_early_init(void); -void __init sme_encrypt_kernel(void); +void __init sme_encrypt_kernel(struct boot_params *bp); void __init sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); @@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } static inline void __init sme_early_init(void) { } -static inline void __init sme_encrypt_kernel(void) { } +static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } static inline void __init sme_enable(struct boot_params *bp) { } static inline bool sme_active(void) { return false; } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 402a11c803c3..4ad41087ce0e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -11,7 +11,7 @@ * Fill the CPU return stack buffer. * * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; jmp' loop to capture speculative execution. + * infinite 'pause; lfence; jmp' loop to capture speculative execution. * * This is required in various cases for retpoline and IBRS-based * mitigations for the Spectre variant 2 vulnerability. Sometimes to @@ -38,11 +38,13 @@ call 772f; \ 773: /* speculation trap */ \ pause; \ + lfence; \ jmp 773b; \ 772: \ call 774f; \ 775: /* speculation trap */ \ pause; \ + lfence; \ jmp 775b; \ 774: \ dec reg; \ @@ -73,6 +75,7 @@ call .Ldo_rop_\@ .Lspec_trap_\@: pause + lfence jmp .Lspec_trap_\@ .Ldo_rop_\@: mov \reg, (%_ASM_SP) @@ -165,6 +168,7 @@ " .align 16\n" \ "901: call 903f;\n" \ "902: pause;\n" \ + " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ "903: addl $4, %%esp;\n" \ @@ -190,6 +194,9 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +extern char __indirect_thunk_start[]; +extern char __indirect_thunk_end[]; + /* * On VMEXIT we must ensure that no RSB predictions learned in the guest * can be followed in the host, by overwriting the RSB completely. Both @@ -199,16 +206,17 @@ enum spectre_v2_mitigation { static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - unsigned long loops = RSB_CLEAR_LOOPS / 2; + unsigned long loops; asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE ALTERNATIVE("jmp 910f", __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), X86_FEATURE_RETPOLINE) "910:" - : "=&r" (loops), ASM_CALL_CONSTRAINT - : "r" (loops) : "memory" ); + : "=r" (loops), ASM_CALL_CONSTRAINT + : : "memory" ); #endif } + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 31051f35cbb7..3de69330e6c5 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -88,6 +88,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *, long); #endif +dotraplinkage void do_mce(struct pt_regs *, long); static inline int get_si_code(unsigned long condition) { diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 81bb565f4497..7e2baf7304ae 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -29,10 +29,13 @@ KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y -OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y OBJECT_FILES_NON_STANDARD_test_nx.o := y OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y +ifdef CONFIG_FRAME_POINTER +OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y +endif + # If instrumentation of this dir is enabled, boot hangs during first second. # Probably could be more selective here, but note that files related to irqs, # boot, dumpstack/stacktrace, etc are either non-interesting or can lead to diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 880441f24146..25ddf02598d2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void) return APIC_SYMMETRIC_IO; } +/* + * An initial setup of the virtual wire mode. + */ +void __init init_bsp_APIC(void) +{ + unsigned int value; + + /* + * Don't do the setup now if we have a SMP BIOS as the + * through-I/O-APIC virtual wire mode might be active. + */ + if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) + return; + + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + + /* + * Enable APIC. + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else +#endif + value |= APIC_SPIV_FOCUS_DISABLED; + value |= SPURIOUS_APIC_VECTOR; + apic_write(APIC_SPIV, value); + + /* + * Set up the virtual wire mode. + */ + apic_write(APIC_LVT0, APIC_DM_EXTINT); + value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; + if (apic_extnmi == APIC_EXTNMI_NONE) + value |= APIC_LVT_MASKED; + apic_write(APIC_LVT1, value); +} + /* Init the interrupt delivery mode for the BSP */ void __init apic_intr_mode_init(void) { diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f8b03bb8e725..3cc471beb50b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, err = assign_irq_vector_policy(irqd, info); trace_vector_setup(virq + i, false, err); - if (err) + if (err) { + irqd->chip_data = NULL; + free_apic_chip_data(apicd); goto error; + } } return 0; error: - x86_vector_free_irqs(domain, virq, i + 1); + x86_vector_free_irqs(domain, virq, i); return err; } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e4dc26185aa7..390b3dc3d438 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,6 +23,7 @@ #include <asm/alternative.h> #include <asm/pgtable.h> #include <asm/set_memory.h> +#include <asm/intel-family.h> static void __init spectre_v2_select_mitigation(void); @@ -155,6 +156,23 @@ disable: return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + return true; + } + } + return false; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -213,6 +231,24 @@ retpoline_auto: spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); + + /* + * If neither SMEP or KPTI are available, there is a risk of + * hitting userspace addresses in the RSB after a context switch + * from a shallow call stack to a deeper one. To prevent this fill + * the entire RSB, even when using IBRS. + * + * Skylake era CPUs have a separate issue with *underflow* of the + * RSB, when they will predict 'ret' targets from the generic BTB. + * The proper mitigation for this is IBRS. If IBRS is not supported + * or deactivated in favour of retpolines the RSB fill on context + * switch is required. + */ + if ((!boot_cpu_has(X86_FEATURE_PTI) && + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Filling RSB on context switch\n"); + } } #undef pr_fmt diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 88dcf8479013..99442370de40 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) */ if (static_branch_unlikely(&rdt_mon_enable_key)) rmdir_mondata_subdir_allrdtgrp(r, d->id); - kfree(d->ctrl_val); - kfree(d->rmid_busy_llc); - kfree(d->mbm_total); - kfree(d->mbm_local); list_del(&d->list); if (is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); @@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) cancel_delayed_work(&d->cqm_limbo); } + kfree(d->ctrl_val); + kfree(d->rmid_busy_llc); + kfree(d->mbm_total); + kfree(d->mbm_local); kfree(d); return; } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1d616d08eee..868e412b4f0c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1785,6 +1785,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; +dotraplinkage void do_mce(struct pt_regs *regs, long error_code) +{ + machine_check_vector(regs, error_code); +} + /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off: diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 05459ad3db46..d0e69769abfd 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -21,7 +21,6 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 7cb8ba08beb9..ef61f540cf0a 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -8,6 +8,7 @@ #include <asm/ftrace.h> #include <asm/export.h> #include <asm/nospec-branch.h> +#include <asm/unwind_hints.h> .code64 .section .entry.text, "ax" @@ -20,7 +21,6 @@ EXPORT_SYMBOL(__fentry__) EXPORT_SYMBOL(mcount) #endif -/* All cases save the original rbp (8 bytes) */ #ifdef CONFIG_FRAME_POINTER # ifdef CC_USING_FENTRY /* Save parent and function stack frames (rip and rbp) */ @@ -31,7 +31,7 @@ EXPORT_SYMBOL(mcount) # endif #else /* No need to save a stack frame */ -# define MCOUNT_FRAME_SIZE 8 +# define MCOUNT_FRAME_SIZE 0 #endif /* CONFIG_FRAME_POINTER */ /* Size of stack used to save mcount regs in save_mcount_regs */ @@ -64,10 +64,10 @@ EXPORT_SYMBOL(mcount) */ .macro save_mcount_regs added=0 - /* Always save the original rbp */ +#ifdef CONFIG_FRAME_POINTER + /* Save the original rbp */ pushq %rbp -#ifdef CONFIG_FRAME_POINTER /* * Stack traces will stop at the ftrace trampoline if the frame pointer * is not set up properly. If fentry is used, we need to save a frame @@ -105,7 +105,11 @@ EXPORT_SYMBOL(mcount) * Save the original RBP. Even though the mcount ABI does not * require this, it helps out callers. */ +#ifdef CONFIG_FRAME_POINTER movq MCOUNT_REG_SIZE-8(%rsp), %rdx +#else + movq %rbp, %rdx +#endif movq %rdx, RBP(%rsp) /* Copy the parent address into %rsi (second parameter) */ @@ -148,7 +152,7 @@ EXPORT_SYMBOL(mcount) ENTRY(function_hook) retq -END(function_hook) +ENDPROC(function_hook) ENTRY(ftrace_caller) /* save_mcount_regs fills in first two parameters */ @@ -184,7 +188,7 @@ GLOBAL(ftrace_graph_call) /* This is weak to keep gas from relaxing the jumps */ WEAK(ftrace_stub) retq -END(ftrace_caller) +ENDPROC(ftrace_caller) ENTRY(ftrace_regs_caller) /* Save the current flags before any operations that can change them */ @@ -255,7 +259,7 @@ GLOBAL(ftrace_regs_caller_end) jmp ftrace_epilogue -END(ftrace_regs_caller) +ENDPROC(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ @@ -313,9 +317,10 @@ ENTRY(ftrace_graph_caller) restore_mcount_regs retq -END(ftrace_graph_caller) +ENDPROC(ftrace_graph_caller) -GLOBAL(return_to_handler) +ENTRY(return_to_handler) + UNWIND_HINT_EMPTY subq $24, %rsp /* Save the return values */ @@ -330,4 +335,5 @@ GLOBAL(return_to_handler) movq (%rsp), %rax addq $24, %rsp JMP_NOSPEC %rdi +END(return_to_handler) #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 6a5d757b9cfd..7ba5d819ebe3 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr, p = fixup_pointer(&phys_base, physaddr); *p += load_delta - sme_get_me_mask(); - /* Encrypt the kernel (if SME is active) */ - sme_encrypt_kernel(); + /* Encrypt the kernel and related (if SME is active) */ + sme_encrypt_kernel(bp); /* * Return the SME encryption mask (if SME is active) to be used as a diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index d985cef3984f..56d99be3706a 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -56,7 +56,7 @@ struct idt_data { * Early traps running on the DEFAULT_STACK because the other interrupt * stacks work only after cpu_init(). */ -static const __initdata struct idt_data early_idts[] = { +static const __initconst struct idt_data early_idts[] = { INTG(X86_TRAP_DB, debug), SYSG(X86_TRAP_BP, int3), #ifdef CONFIG_X86_32 @@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = { * the traps which use them are reinitialized with IST after cpu_init() has * set up TSS. */ -static const __initdata struct idt_data def_idts[] = { +static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_DE, divide_error), INTG(X86_TRAP_NMI, nmi), INTG(X86_TRAP_BR, bounds), @@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = { /* * The APIC and SMP idt entries */ -static const __initdata struct idt_data apic_idts[] = { +static const __initconst struct idt_data apic_idts[] = { #ifdef CONFIG_SMP INTG(RESCHEDULE_VECTOR, reschedule_interrupt), INTG(CALL_FUNCTION_VECTOR, call_function_interrupt), @@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = { * Early traps running on the DEFAULT_STACK because the other interrupt * stacks work only after cpu_init(). */ -static const __initdata struct idt_data early_pf_idts[] = { +static const __initconst struct idt_data early_pf_idts[] = { INTG(X86_TRAP_PF, page_fault), }; @@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = { * Override for the debug_idt. Same as the default, but with interrupt * stack set to DEFAULT_STACK (0). Required for NMI trap handling. */ -static const __initdata struct idt_data dbg_idts[] = { +static const __initconst struct idt_data dbg_idts[] = { INTG(X86_TRAP_DB, debug), INTG(X86_TRAP_BP, int3), }; @@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; * The exceptions which use Interrupt stacks. They are setup after * cpu_init() when the TSS has been initialized. */ -static const __initdata struct idt_data ist_idts[] = { +static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, debug, DEBUG_STACK), ISTG(X86_TRAP_NMI, nmi, NMI_STACK), SISTG(X86_TRAP_BP, int3, DEBUG_STACK), diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 8da3e909e967..a539410c4ea9 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -61,6 +61,9 @@ void __init init_ISA_irqs(void) struct irq_chip *chip = legacy_pic->chip; int i; +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) + init_bsp_APIC(); +#endif legacy_pic->init(0); for (i = 0; i < nr_legacy_irqs(); i++) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index e941136e24d8..203d398802a3 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -40,6 +40,7 @@ #include <asm/debugreg.h> #include <asm/set_memory.h> #include <asm/sections.h> +#include <asm/nospec-branch.h> #include "common.h" @@ -203,7 +204,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) } /* Check whether insn is indirect jump */ -static int insn_is_indirect_jump(struct insn *insn) +static int __insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -237,6 +238,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) return (start <= target && target <= start + len); } +static int insn_is_indirect_jump(struct insn *insn) +{ + int ret = __insn_is_indirect_jump(insn); + +#ifdef CONFIG_RETPOLINE + /* + * Jump to x86_indirect_thunk_* is treated as an indirect jump. + * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with + * older gcc may use indirect jump. So we add this check instead of + * replace indirect-jump check. + */ + if (!ret) + ret = insn_jump_into_range(insn, + (unsigned long)__indirect_thunk_start, + (unsigned long)__indirect_thunk_end - + (unsigned long)__indirect_thunk_start); +#endif + return ret; +} + /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 832a6acd730f..cb368c2a22ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -380,19 +380,24 @@ void stop_this_cpu(void *dummy) disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); + /* + * Use wbinvd on processors that support SME. This provides support + * for performing a successful kexec when going from SME inactive + * to SME active (or vice-versa). The cache must be cleared so that + * if there are entries with the same physical address, both with and + * without the encryption bit, they don't race each other when flushed + * and potentially end up with the wrong entry being committed to + * memory. + */ + if (boot_cpu_has(X86_FEATURE_SME)) + native_wbinvd(); for (;;) { /* - * Use wbinvd followed by hlt to stop the processor. This - * provides support for kexec on a processor that supports - * SME. With kexec, going from SME inactive to SME active - * requires clearing cache entries so that addresses without - * the encryption bit set don't corrupt the same physical - * address that has the encryption bit set when caches are - * flushed. To achieve this a wbinvd is performed followed by - * a hlt. Even if the processor is not in the kexec/SME - * scenario this only adds a wbinvd to a halting processor. + * Use native_halt() so that memory contents don't change + * (stack usage and variables) after possibly issuing the + * native_wbinvd() above. */ - asm volatile("wbinvd; hlt" : : : "memory"); + native_halt(); } } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 145810b0edf6..68d7ab81c62f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -364,16 +364,6 @@ static void __init reserve_initrd(void) !ramdisk_image || !ramdisk_size) return; /* No initrd provided by bootloader */ - /* - * If SME is active, this memory will be marked encrypted by the - * kernel when it is accessed (including relocation). However, the - * ramdisk image was loaded decrypted by the bootloader, so make - * sure that it is encrypted before accessing it. For SEV the - * ramdisk will already be encrypted, so only do this for SME. - */ - if (sme_active()) - sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image); - initrd_start = 0; mapped_size = memblock_mem_size(max_pfn_mapped); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 8ea117f8142e..e169e85db434 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void) case INTEL_FAM6_KABYLAKE_DESKTOP: crystal_khz = 24000; /* 24.0 MHz */ break; - case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ATOM_DENVERTON: crystal_khz = 25000; /* 25.0 MHz */ break; @@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void) } } + if (crystal_khz == 0) + return 0; /* * TSC frequency determined by CPUID is a "hardware reported" * frequency and is the most accurate one so far we have. This @@ -1315,6 +1316,12 @@ void __init tsc_init(void) (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); + if (cpu_khz != tsc_khz) { + pr_info("Detected %lu.%03lu MHz TSC", + (unsigned long)tsc_khz / 1000, + (unsigned long)tsc_khz % 1000); + } + /* Sanitize TSC ADJUST before cyc2ns gets initialized */ tsc_store_and_check_tsc_adjust(true); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index be86a865087a..1f9188f5357c 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -74,8 +74,50 @@ static struct orc_entry *orc_module_find(unsigned long ip) } #endif +#ifdef CONFIG_DYNAMIC_FTRACE +static struct orc_entry *orc_find(unsigned long ip); + +/* + * Ftrace dynamic trampolines do not have orc entries of their own. + * But they are copies of the ftrace entries that are static and + * defined in ftrace_*.S, which do have orc entries. + * + * If the undwinder comes across a ftrace trampoline, then find the + * ftrace function that was used to create it, and use that ftrace + * function's orc entrie, as the placement of the return code in + * the stack will be identical. + */ +static struct orc_entry *orc_ftrace_find(unsigned long ip) +{ + struct ftrace_ops *ops; + unsigned long caller; + + ops = ftrace_ops_trampoline(ip); + if (!ops) + return NULL; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + caller = (unsigned long)ftrace_regs_call; + else + caller = (unsigned long)ftrace_call; + + /* Prevent unlikely recursion */ + if (ip == caller) + return NULL; + + return orc_find(caller); +} +#else +static struct orc_entry *orc_ftrace_find(unsigned long ip) +{ + return NULL; +} +#endif + static struct orc_entry *orc_find(unsigned long ip) { + static struct orc_entry *orc; + if (!orc_init) return NULL; @@ -111,7 +153,11 @@ static struct orc_entry *orc_find(unsigned long ip) __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); /* Module lookup: */ - return orc_module_find(ip); + orc = orc_module_find(ip); + if (orc) + return orc; + + return orc_ftrace_find(ip); } static void orc_sort_swap(void *_a, void *_b, int size) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1e413a9326aa..9b138a06c1a4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -124,6 +124,12 @@ SECTIONS ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); #endif +#ifdef CONFIG_RETPOLINE + __indirect_thunk_start = .; + *(.text.__x86.indirect_thunk) + __indirect_thunk_end = .; +#endif + /* End of text section */ _etext = .; } :text = 0x9090 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1cec2c62a0b0..c53298dfbf50 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7496,13 +7496,13 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) { + if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* * When EFER.LME and CR0.PG are set, the processor is in * 64-bit mode (though maybe in a 32-bit code segment). * CR4.PAE and EFER.LMA must be set. */ - if (!(sregs->cr4 & X86_CR4_PAE_BIT) + if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA)) return -EINVAL; } else { diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index cb45c6cb465f..dfb2ba91b670 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -9,7 +9,7 @@ #include <asm/nospec-branch.h> .macro THUNK reg - .section .text.__x86.indirect_thunk.\reg + .section .text.__x86.indirect_thunk ENTRY(__x86_indirect_thunk_\reg) CFI_STARTPROC @@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg) * than one per register with the correct names. So we do it * the simple and nasty way... */ -#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) +#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) +#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) #define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) GENERATE_THUNK(_ASM_AX) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 06fe3d51d385..b3e40773dce0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ -static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) +static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info, + u32 *pkey) { /* This is effectively an #ifdef */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) return; /* Fault not from Protection Keys: nothing to do */ - if (si_code != SEGV_PKUERR) + if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV)) return; /* * force_sig_info_fault() is called from a number of @@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, lsb = PAGE_SHIFT; info.si_addr_lsb = lsb; - fill_sig_info_pkey(si_code, &info, pkey); + fill_sig_info_pkey(si_signo, si_code, &info, pkey); force_sig_info(si_signo, &info, tsk); } diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 47388f0c0e59..af6f2f9c6a26 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -21,10 +21,14 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); -static __init void *early_alloc(size_t size, int nid) +static __init void *early_alloc(size_t size, int nid, bool panic) { - return memblock_virt_alloc_try_nid_nopanic(size, size, - __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); + if (panic) + return memblock_virt_alloc_try_nid(size, size, + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); + else + return memblock_virt_alloc_try_nid_nopanic(size, size, + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); } static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, @@ -38,14 +42,14 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, if (boot_cpu_has(X86_FEATURE_PSE) && ((end - addr) == PMD_SIZE) && IS_ALIGNED(addr, PMD_SIZE)) { - p = early_alloc(PMD_SIZE, nid); + p = early_alloc(PMD_SIZE, nid, false); if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) return; else if (p) memblock_free(__pa(p), PMD_SIZE); } - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); pmd_populate_kernel(&init_mm, pmd, p); } @@ -57,7 +61,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, if (!pte_none(*pte)) continue; - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); } while (pte++, addr += PAGE_SIZE, addr != end); @@ -75,14 +79,14 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, if (boot_cpu_has(X86_FEATURE_GBPAGES) && ((end - addr) == PUD_SIZE) && IS_ALIGNED(addr, PUD_SIZE)) { - p = early_alloc(PUD_SIZE, nid); + p = early_alloc(PUD_SIZE, nid, false); if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) return; else if (p) memblock_free(__pa(p), PUD_SIZE); } - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); pud_populate(&init_mm, pud, p); } @@ -101,7 +105,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, unsigned long next; if (p4d_none(*p4d)) { - void *p = early_alloc(PAGE_SIZE, nid); + void *p = early_alloc(PAGE_SIZE, nid, true); p4d_populate(&init_mm, p4d, p); } @@ -122,7 +126,7 @@ static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, unsigned long next; if (pgd_none(*pgd)) { - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); pgd_populate(&init_mm, pgd, p); } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 391b13402e40..e1d61e8500f9 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -464,37 +464,62 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); } -static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, - unsigned long end) +struct sme_populate_pgd_data { + void *pgtable_area; + pgd_t *pgd; + + pmdval_t pmd_flags; + pteval_t pte_flags; + unsigned long paddr; + + unsigned long vaddr; + unsigned long vaddr_end; +}; + +static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) { unsigned long pgd_start, pgd_end, pgd_size; pgd_t *pgd_p; - pgd_start = start & PGDIR_MASK; - pgd_end = end & PGDIR_MASK; + pgd_start = ppd->vaddr & PGDIR_MASK; + pgd_end = ppd->vaddr_end & PGDIR_MASK; - pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); - pgd_size *= sizeof(pgd_t); + pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t); - pgd_p = pgd_base + pgd_index(start); + pgd_p = ppd->pgd + pgd_index(ppd->vaddr); memset(pgd_p, 0, pgd_size); } -#define PGD_FLAGS _KERNPG_TABLE_NOENC -#define P4D_FLAGS _KERNPG_TABLE_NOENC -#define PUD_FLAGS _KERNPG_TABLE_NOENC -#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) +#define PGD_FLAGS _KERNPG_TABLE_NOENC +#define P4D_FLAGS _KERNPG_TABLE_NOENC +#define PUD_FLAGS _KERNPG_TABLE_NOENC +#define PMD_FLAGS _KERNPG_TABLE_NOENC + +#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) + +#define PMD_FLAGS_DEC PMD_FLAGS_LARGE +#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ + (_PAGE_PAT | _PAGE_PWT)) + +#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) + +#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) + +#define PTE_FLAGS_DEC PTE_FLAGS +#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ + (_PAGE_PAT | _PAGE_PWT)) + +#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) -static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, - unsigned long vaddr, pmdval_t pmd_val) +static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) { pgd_t *pgd_p; p4d_t *p4d_p; pud_t *pud_p; pmd_t *pmd_p; - pgd_p = pgd_base + pgd_index(vaddr); + pgd_p = ppd->pgd + pgd_index(ppd->vaddr); if (native_pgd_val(*pgd_p)) { if (IS_ENABLED(CONFIG_X86_5LEVEL)) p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); @@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, pgd_t pgd; if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - p4d_p = pgtable_area; + p4d_p = ppd->pgtable_area; memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); - pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; + ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); } else { - pud_p = pgtable_area; + pud_p = ppd->pgtable_area; memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); - pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; + ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); } @@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, } if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - p4d_p += p4d_index(vaddr); + p4d_p += p4d_index(ppd->vaddr); if (native_p4d_val(*p4d_p)) { pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); } else { p4d_t p4d; - pud_p = pgtable_area; + pud_p = ppd->pgtable_area; memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); - pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; + ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); native_set_p4d(p4d_p, p4d); } } - pud_p += pud_index(vaddr); + pud_p += pud_index(ppd->vaddr); if (native_pud_val(*pud_p)) { if (native_pud_val(*pud_p) & _PAGE_PSE) - goto out; + return NULL; pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); } else { pud_t pud; - pmd_p = pgtable_area; + pmd_p = ppd->pgtable_area; memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); - pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; + ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); native_set_pud(pud_p, pud); } - pmd_p += pmd_index(vaddr); + return pmd_p; +} + +static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +{ + pmd_t *pmd_p; + + pmd_p = sme_prepare_pgd(ppd); + if (!pmd_p) + return; + + pmd_p += pmd_index(ppd->vaddr); if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) - native_set_pmd(pmd_p, native_make_pmd(pmd_val)); + native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags)); +} -out: - return pgtable_area; +static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) +{ + pmd_t *pmd_p; + pte_t *pte_p; + + pmd_p = sme_prepare_pgd(ppd); + if (!pmd_p) + return; + + pmd_p += pmd_index(ppd->vaddr); + if (native_pmd_val(*pmd_p)) { + if (native_pmd_val(*pmd_p) & _PAGE_PSE) + return; + + pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK); + } else { + pmd_t pmd; + + pte_p = ppd->pgtable_area; + memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE); + ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE; + + pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS); + native_set_pmd(pmd_p, pmd); + } + + pte_p += pte_index(ppd->vaddr); + if (!native_pte_val(*pte_p)) + native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags)); +} + +static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) +{ + while (ppd->vaddr < ppd->vaddr_end) { + sme_populate_pgd_large(ppd); + + ppd->vaddr += PMD_PAGE_SIZE; + ppd->paddr += PMD_PAGE_SIZE; + } +} + +static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) +{ + while (ppd->vaddr < ppd->vaddr_end) { + sme_populate_pgd(ppd); + + ppd->vaddr += PAGE_SIZE; + ppd->paddr += PAGE_SIZE; + } +} + +static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, + pmdval_t pmd_flags, pteval_t pte_flags) +{ + unsigned long vaddr_end; + + ppd->pmd_flags = pmd_flags; + ppd->pte_flags = pte_flags; + + /* Save original end value since we modify the struct value */ + vaddr_end = ppd->vaddr_end; + + /* If start is not 2MB aligned, create PTE entries */ + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); + __sme_map_range_pte(ppd); + + /* Create PMD entries */ + ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; + __sme_map_range_pmd(ppd); + + /* If end is not 2MB aligned, create PTE entries */ + ppd->vaddr_end = vaddr_end; + __sme_map_range_pte(ppd); +} + +static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); +} + +static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); +} + +static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); } static unsigned long __init sme_pgtable_calc(unsigned long len) { - unsigned long p4d_size, pud_size, pmd_size; + unsigned long p4d_size, pud_size, pmd_size, pte_size; unsigned long total; /* * Perform a relatively simplistic calculation of the pagetable - * entries that are needed. That mappings will be covered by 2MB - * PMD entries so we can conservatively calculate the required + * entries that are needed. Those mappings will be covered mostly + * by 2MB PMD entries so we can conservatively calculate the required * number of P4D, PUD and PMD structures needed to perform the - * mappings. Incrementing the count for each covers the case where - * the addresses cross entries. + * mappings. For mappings that are not 2MB aligned, PTE mappings + * would be needed for the start and end portion of the address range + * that fall outside of the 2MB alignment. This results in, at most, + * two extra pages to hold PTE entries for each range that is mapped. + * Incrementing the count for each covers the case where the addresses + * cross entries. */ if (IS_ENABLED(CONFIG_X86_5LEVEL)) { p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; @@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) } pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; + pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE; - total = p4d_size + pud_size + pmd_size; + total = p4d_size + pud_size + pmd_size + pte_size; /* * Now calculate the added pagetable structures needed to populate @@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) return total; } -void __init sme_encrypt_kernel(void) +void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; unsigned long kernel_start, kernel_end, kernel_len; + unsigned long initrd_start, initrd_end, initrd_len; + struct sme_populate_pgd_data ppd; unsigned long pgtable_area_len; - unsigned long paddr, pmd_flags; unsigned long decrypted_base; - void *pgtable_area; - pgd_t *pgd; if (!sme_active()) return; /* - * Prepare for encrypting the kernel by building new pagetables with - * the necessary attributes needed to encrypt the kernel in place. + * Prepare for encrypting the kernel and initrd by building new + * pagetables with the necessary attributes needed to encrypt the + * kernel in place. * * One range of virtual addresses will map the memory occupied - * by the kernel as encrypted. + * by the kernel and initrd as encrypted. * * Another range of virtual addresses will map the memory occupied - * by the kernel as decrypted and write-protected. + * by the kernel and initrd as decrypted and write-protected. * * The use of write-protect attribute will prevent any of the * memory from being cached. @@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void) kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); kernel_len = kernel_end - kernel_start; + initrd_start = 0; + initrd_end = 0; + initrd_len = 0; +#ifdef CONFIG_BLK_DEV_INITRD + initrd_len = (unsigned long)bp->hdr.ramdisk_size | + ((unsigned long)bp->ext_ramdisk_size << 32); + if (initrd_len) { + initrd_start = (unsigned long)bp->hdr.ramdisk_image | + ((unsigned long)bp->ext_ramdisk_image << 32); + initrd_end = PAGE_ALIGN(initrd_start + initrd_len); + initrd_len = initrd_end - initrd_start; + } +#endif + /* Set the encryption workarea to be immediately after the kernel */ workarea_start = kernel_end; @@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void) */ pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; + if (initrd_len) + pgtable_area_len += sme_pgtable_calc(initrd_len) * 2; /* PUDs and PMDs needed in the current pagetables for the workarea */ pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); /* * The total workarea includes the executable encryption area and - * the pagetable area. + * the pagetable area. The start of the workarea is already 2MB + * aligned, align the end of the workarea on a 2MB boundary so that + * we don't try to create/allocate PTE entries from the workarea + * before it is mapped. */ workarea_len = execute_len + pgtable_area_len; - workarea_end = workarea_start + workarea_len; + workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); /* * Set the address to the start of where newly created pagetable @@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void) * pagetables and when the new encrypted and decrypted kernel * mappings are populated. */ - pgtable_area = (void *)execute_end; + ppd.pgtable_area = (void *)execute_end; /* * Make sure the current pagetable structure has entries for * addressing the workarea. */ - pgd = (pgd_t *)native_read_cr3_pa(); - paddr = workarea_start; - while (paddr < workarea_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr, - paddr + PMD_FLAGS); - - paddr += PMD_PAGE_SIZE; - } + ppd.pgd = (pgd_t *)native_read_cr3_pa(); + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start; + ppd.vaddr_end = workarea_end; + sme_map_range_decrypted(&ppd); /* Flush the TLB - no globals so cr3 is enough */ native_write_cr3(__native_read_cr3()); /* * A new pagetable structure is being built to allow for the kernel - * to be encrypted. It starts with an empty PGD that will then be - * populated with new PUDs and PMDs as the encrypted and decrypted - * kernel mappings are created. + * and initrd to be encrypted. It starts with an empty PGD that will + * then be populated with new PUDs and PMDs as the encrypted and + * decrypted kernel mappings are created. */ - pgd = pgtable_area; - memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); - pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; - - /* Add encrypted kernel (identity) mappings */ - pmd_flags = PMD_FLAGS | _PAGE_ENC; - paddr = kernel_start; - while (paddr < kernel_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr, - paddr + pmd_flags); - - paddr += PMD_PAGE_SIZE; - } + ppd.pgd = ppd.pgtable_area; + memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); + ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; /* * A different PGD index/entry must be used to get different @@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void) * the base of the mapping. */ decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); + if (initrd_len) { + unsigned long check_base; + + check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1); + decrypted_base = max(decrypted_base, check_base); + } decrypted_base <<= PGDIR_SHIFT; + /* Add encrypted kernel (identity) mappings */ + ppd.paddr = kernel_start; + ppd.vaddr = kernel_start; + ppd.vaddr_end = kernel_end; + sme_map_range_encrypted(&ppd); + /* Add decrypted, write-protected kernel (non-identity) mappings */ - pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); - paddr = kernel_start; - while (paddr < kernel_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr + decrypted_base, - paddr + pmd_flags); - - paddr += PMD_PAGE_SIZE; + ppd.paddr = kernel_start; + ppd.vaddr = kernel_start + decrypted_base; + ppd.vaddr_end = kernel_end + decrypted_base; + sme_map_range_decrypted_wp(&ppd); + + if (initrd_len) { + /* Add encrypted initrd (identity) mappings */ + ppd.paddr = initrd_start; + ppd.vaddr = initrd_start; + ppd.vaddr_end = initrd_end; + sme_map_range_encrypted(&ppd); + /* + * Add decrypted, write-protected initrd (non-identity) mappings + */ + ppd.paddr = initrd_start; + ppd.vaddr = initrd_start + decrypted_base; + ppd.vaddr_end = initrd_end + decrypted_base; + sme_map_range_decrypted_wp(&ppd); } /* Add decrypted workarea mappings to both kernel mappings */ - paddr = workarea_start; - while (paddr < workarea_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr, - paddr + PMD_FLAGS); + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start; + ppd.vaddr_end = workarea_end; + sme_map_range_decrypted(&ppd); - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr + decrypted_base, - paddr + PMD_FLAGS); - - paddr += PMD_PAGE_SIZE; - } + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start + decrypted_base; + ppd.vaddr_end = workarea_end + decrypted_base; + sme_map_range_decrypted(&ppd); /* Perform the encryption */ sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, - kernel_len, workarea_start, (unsigned long)pgd); + kernel_len, workarea_start, (unsigned long)ppd.pgd); + + if (initrd_len) + sme_encrypt_execute(initrd_start, initrd_start + decrypted_base, + initrd_len, workarea_start, + (unsigned long)ppd.pgd); /* * At this point we are running encrypted. Remove the mappings for * the decrypted areas - all that is needed for this is to remove * the PGD entry/entries. */ - sme_clear_pgd(pgd, kernel_start + decrypted_base, - kernel_end + decrypted_base); + ppd.vaddr = kernel_start + decrypted_base; + ppd.vaddr_end = kernel_end + decrypted_base; + sme_clear_pgd(&ppd); + + if (initrd_len) { + ppd.vaddr = initrd_start + decrypted_base; + ppd.vaddr_end = initrd_end + decrypted_base; + sme_clear_pgd(&ppd); + } - sme_clear_pgd(pgd, workarea_start + decrypted_base, - workarea_end + decrypted_base); + ppd.vaddr = workarea_start + decrypted_base; + ppd.vaddr_end = workarea_end + decrypted_base; + sme_clear_pgd(&ppd); /* Flush the TLB - no globals so cr3 is enough */ native_write_cr3(__native_read_cr3()); diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 730e6d541df1..01f682cf77a8 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute) /* * Entry parameters: - * RDI - virtual address for the encrypted kernel mapping - * RSI - virtual address for the decrypted kernel mapping - * RDX - length of kernel + * RDI - virtual address for the encrypted mapping + * RSI - virtual address for the decrypted mapping + * RDX - length to encrypt * RCX - virtual address of the encryption workarea, including: * - stack page (PAGE_SIZE) * - encryption routine page (PAGE_SIZE) @@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute) addq $PAGE_SIZE, %rax /* Workarea encryption routine */ push %r12 - movq %rdi, %r10 /* Encrypted kernel */ - movq %rsi, %r11 /* Decrypted kernel */ - movq %rdx, %r12 /* Kernel length */ + movq %rdi, %r10 /* Encrypted area */ + movq %rsi, %r11 /* Decrypted area */ + movq %rdx, %r12 /* Area length */ /* Copy encryption routine into the workarea */ movq %rax, %rdi /* Workarea encryption routine */ @@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute) rep movsb /* Setup registers for call */ - movq %r10, %rdi /* Encrypted kernel */ - movq %r11, %rsi /* Decrypted kernel */ + movq %r10, %rdi /* Encrypted area */ + movq %r11, %rsi /* Decrypted area */ movq %r8, %rdx /* Pagetables used for encryption */ - movq %r12, %rcx /* Kernel length */ + movq %r12, %rcx /* Area length */ movq %rax, %r8 /* Workarea encryption routine */ addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ @@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute) ENTRY(__enc_copy) /* - * Routine used to encrypt kernel. + * Routine used to encrypt memory in place. * This routine must be run outside of the kernel proper since * the kernel will be encrypted during the process. So this * routine is defined here and then copied to an area outside @@ -79,19 +79,19 @@ ENTRY(__enc_copy) * during execution. * * On entry the registers must be: - * RDI - virtual address for the encrypted kernel mapping - * RSI - virtual address for the decrypted kernel mapping + * RDI - virtual address for the encrypted mapping + * RSI - virtual address for the decrypted mapping * RDX - address of the pagetables to use for encryption - * RCX - length of kernel + * RCX - length of area * R8 - intermediate copy buffer * * RAX - points to this routine * - * The kernel will be encrypted by copying from the non-encrypted - * kernel space to an intermediate buffer and then copying from the - * intermediate buffer back to the encrypted kernel space. The physical - * addresses of the two kernel space mappings are the same which - * results in the kernel being encrypted "in place". + * The area will be encrypted by copying from the non-encrypted + * memory space to an intermediate buffer and then copying from the + * intermediate buffer back to the encrypted memory space. The physical + * addresses of the two mappings are the same which results in the area + * being encrypted "in place". */ /* Enable the new page tables */ mov %rdx, %cr3 @@ -103,47 +103,55 @@ ENTRY(__enc_copy) orq $X86_CR4_PGE, %rdx mov %rdx, %cr4 + push %r15 + push %r12 + + movq %rcx, %r9 /* Save area length */ + movq %rdi, %r10 /* Save encrypted area address */ + movq %rsi, %r11 /* Save decrypted area address */ + /* Set the PAT register PA5 entry to write-protect */ - push %rcx movl $MSR_IA32_CR_PAT, %ecx rdmsr - push %rdx /* Save original PAT value */ + mov %rdx, %r15 /* Save original PAT value */ andl $0xffff00ff, %edx /* Clear PA5 */ orl $0x00000500, %edx /* Set PA5 to WP */ wrmsr - pop %rdx /* RDX contains original PAT value */ - pop %rcx - - movq %rcx, %r9 /* Save kernel length */ - movq %rdi, %r10 /* Save encrypted kernel address */ - movq %rsi, %r11 /* Save decrypted kernel address */ wbinvd /* Invalidate any cache entries */ - /* Copy/encrypt 2MB at a time */ + /* Copy/encrypt up to 2MB at a time */ + movq $PMD_PAGE_SIZE, %r12 1: - movq %r11, %rsi /* Source - decrypted kernel */ + cmpq %r12, %r9 + jnb 2f + movq %r9, %r12 + +2: + movq %r11, %rsi /* Source - decrypted area */ movq %r8, %rdi /* Dest - intermediate copy buffer */ - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ + movq %r12, %rcx rep movsb movq %r8, %rsi /* Source - intermediate copy buffer */ - movq %r10, %rdi /* Dest - encrypted kernel */ - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ + movq %r10, %rdi /* Dest - encrypted area */ + movq %r12, %rcx rep movsb - addq $PMD_PAGE_SIZE, %r11 - addq $PMD_PAGE_SIZE, %r10 - subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */ + addq %r12, %r11 + addq %r12, %r10 + subq %r12, %r9 /* Kernel length decrement */ jnz 1b /* Kernel length not zero? */ /* Restore PAT register */ - push %rdx /* Save original PAT value */ movl $MSR_IA32_CR_PAT, %ecx rdmsr - pop %rdx /* Restore original PAT value */ + mov %r15, %rdx /* Restore original PAT value */ wrmsr + pop %r12 + pop %r15 + ret .L__enc_copy_end: ENDPROC(__enc_copy) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 87f214fbe66e..5acee5139e28 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -15,8 +15,6 @@ #include <asm/set_memory.h> #include <linux/bpf.h> -int bpf_jit_enable __read_mostly; - /* * assembly code in arch/x86/net/bpf_jit.S */ @@ -154,6 +152,11 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_AX)); } +static bool is_axreg(u32 reg) +{ + return reg == BPF_REG_0; +} + /* add modifiers if 'reg' maps to x64 registers r8..r15 */ static u8 add_1mod(u8 byte, u32 reg) { @@ -447,16 +450,36 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, else if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); + /* b3 holds 'normal' opcode, b2 short form only valid + * in case dst is eax/rax. + */ switch (BPF_OP(insn->code)) { - case BPF_ADD: b3 = 0xC0; break; - case BPF_SUB: b3 = 0xE8; break; - case BPF_AND: b3 = 0xE0; break; - case BPF_OR: b3 = 0xC8; break; - case BPF_XOR: b3 = 0xF0; break; + case BPF_ADD: + b3 = 0xC0; + b2 = 0x05; + break; + case BPF_SUB: + b3 = 0xE8; + b2 = 0x2D; + break; + case BPF_AND: + b3 = 0xE0; + b2 = 0x25; + break; + case BPF_OR: + b3 = 0xC8; + b2 = 0x0D; + break; + case BPF_XOR: + b3 = 0xF0; + b2 = 0x35; + break; } if (is_imm8(imm32)) EMIT3(0x83, add_1reg(b3, dst_reg), imm32); + else if (is_axreg(dst_reg)) + EMIT1_off32(b2, imm32); else EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); break; diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index f6a26e3cb476..54ef19e90705 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -662,11 +662,11 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); */ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) { + static const char *name = "PCI Bus 0000:00"; + struct resource *res, *conflict; u32 base, limit, high; struct pci_dev *other; - struct resource *res; unsigned i; - int r; if (!(pci_probe & PCI_BIG_ROOT_WINDOW)) return; @@ -707,21 +707,26 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) * Allocate a 256GB window directly below the 0xfd00000000 hardware * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6). */ - res->name = "PCI Bus 0000:00"; + res->name = name; res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_WINDOW; res->start = 0xbd00000000ull; res->end = 0xfd00000000ull - 1; - r = request_resource(&iomem_resource, res); - if (r) { + conflict = request_resource_conflict(&iomem_resource, res); + if (conflict) { kfree(res); - return; - } + if (conflict->name != name) + return; - dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n", - res); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + /* We are resuming from suspend; just reenable the window */ + res = conflict; + } else { + dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n", + res); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + pci_bus_add_resource(dev->bus, res, 0); + } base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) | AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK; @@ -733,13 +738,16 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) pci_write_config_dword(dev, AMD_141b_MMIO_HIGH(i), high); pci_write_config_dword(dev, AMD_141b_MMIO_LIMIT(i), limit); pci_write_config_dword(dev, AMD_141b_MMIO_BASE(i), base); - - pci_bus_add_resource(dev->bus, res, 0); } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); #endif diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8193b38a1cae..3c09122bf038 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4449,6 +4449,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { * https://bugzilla.kernel.org/show_bug.cgi?id=121671 */ { "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, + { "LITEON EP1-*", NULL, ATA_HORKAGE_MAX_SEC_1024 }, /* Devices we expect to fail diagnostics */ diff --git a/drivers/base/property.c b/drivers/base/property.c index 851b1b6596a4..613ba820f545 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -16,6 +16,7 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_graph.h> +#include <linux/of_irq.h> #include <linux/property.h> #include <linux/etherdevice.h> #include <linux/phy.h> @@ -997,6 +998,32 @@ fwnode_get_next_child_node(const struct fwnode_handle *fwnode, EXPORT_SYMBOL_GPL(fwnode_get_next_child_node); /** + * fwnode_get_next_available_child_node - Return the next + * available child node handle for a node + * @fwnode: Firmware node to find the next child node for. + * @child: Handle to one of the node's child nodes or a %NULL handle. + */ +struct fwnode_handle * +fwnode_get_next_available_child_node(const struct fwnode_handle *fwnode, + struct fwnode_handle *child) +{ + struct fwnode_handle *next_child = child; + + if (!fwnode) + return NULL; + + do { + next_child = fwnode_get_next_child_node(fwnode, next_child); + + if (!next_child || fwnode_device_is_available(next_child)) + break; + } while (next_child); + + return next_child; +} +EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node); + +/** * device_get_next_child_node - Return the next child node handle for a device * @dev: Device to find the next child node for. * @child: Handle to one of the device's child nodes or a null handle. @@ -1126,21 +1153,21 @@ enum dev_dma_attr device_get_dma_attr(struct device *dev) EXPORT_SYMBOL_GPL(device_get_dma_attr); /** - * device_get_phy_mode - Get phy mode for given device - * @dev: Pointer to the given device + * fwnode_get_phy_mode - Get phy mode for given firmware node + * @fwnode: Pointer to the given node * * The function gets phy interface string from property 'phy-mode' or * 'phy-connection-type', and return its index in phy_modes table, or errno in * error case. */ -int device_get_phy_mode(struct device *dev) +int fwnode_get_phy_mode(struct fwnode_handle *fwnode) { const char *pm; int err, i; - err = device_property_read_string(dev, "phy-mode", &pm); + err = fwnode_property_read_string(fwnode, "phy-mode", &pm); if (err < 0) - err = device_property_read_string(dev, + err = fwnode_property_read_string(fwnode, "phy-connection-type", &pm); if (err < 0) return err; @@ -1151,13 +1178,27 @@ int device_get_phy_mode(struct device *dev) return -ENODEV; } +EXPORT_SYMBOL_GPL(fwnode_get_phy_mode); + +/** + * device_get_phy_mode - Get phy mode for given device + * @dev: Pointer to the given device + * + * The function gets phy interface string from property 'phy-mode' or + * 'phy-connection-type', and return its index in phy_modes table, or errno in + * error case. + */ +int device_get_phy_mode(struct device *dev) +{ + return fwnode_get_phy_mode(dev_fwnode(dev)); +} EXPORT_SYMBOL_GPL(device_get_phy_mode); -static void *device_get_mac_addr(struct device *dev, +static void *fwnode_get_mac_addr(struct fwnode_handle *fwnode, const char *name, char *addr, int alen) { - int ret = device_property_read_u8_array(dev, name, addr, alen); + int ret = fwnode_property_read_u8_array(fwnode, name, addr, alen); if (ret == 0 && alen == ETH_ALEN && is_valid_ether_addr(addr)) return addr; @@ -1165,8 +1206,8 @@ static void *device_get_mac_addr(struct device *dev, } /** - * device_get_mac_address - Get the MAC for a given device - * @dev: Pointer to the device + * fwnode_get_mac_address - Get the MAC from the firmware node + * @fwnode: Pointer to the firmware node * @addr: Address of buffer to store the MAC in * @alen: Length of the buffer pointed to by addr, should be ETH_ALEN * @@ -1187,23 +1228,60 @@ static void *device_get_mac_addr(struct device *dev, * In this case, the real MAC is in 'local-mac-address', and 'mac-address' * exists but is all zeros. */ -void *device_get_mac_address(struct device *dev, char *addr, int alen) +void *fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen) { char *res; - res = device_get_mac_addr(dev, "mac-address", addr, alen); + res = fwnode_get_mac_addr(fwnode, "mac-address", addr, alen); if (res) return res; - res = device_get_mac_addr(dev, "local-mac-address", addr, alen); + res = fwnode_get_mac_addr(fwnode, "local-mac-address", addr, alen); if (res) return res; - return device_get_mac_addr(dev, "address", addr, alen); + return fwnode_get_mac_addr(fwnode, "address", addr, alen); +} +EXPORT_SYMBOL(fwnode_get_mac_address); + +/** + * device_get_mac_address - Get the MAC for a given device + * @dev: Pointer to the device + * @addr: Address of buffer to store the MAC in + * @alen: Length of the buffer pointed to by addr, should be ETH_ALEN + */ +void *device_get_mac_address(struct device *dev, char *addr, int alen) +{ + return fwnode_get_mac_address(dev_fwnode(dev), addr, alen); } EXPORT_SYMBOL(device_get_mac_address); /** + * fwnode_irq_get - Get IRQ directly from a fwnode + * @fwnode: Pointer to the firmware node + * @index: Zero-based index of the IRQ + * + * Returns Linux IRQ number on success. Other values are determined + * accordingly to acpi_/of_ irq_get() operation. + */ +int fwnode_irq_get(struct fwnode_handle *fwnode, unsigned int index) +{ + struct device_node *of_node = to_of_node(fwnode); + struct resource res; + int ret; + + if (IS_ENABLED(CONFIG_OF) && of_node) + return of_irq_get(of_node, index); + + ret = acpi_irq_get(ACPI_HANDLE_FWNODE(fwnode), index, &res); + if (ret) + return ret; + + return res.start; +} +EXPORT_SYMBOL(fwnode_irq_get); + +/** * device_graph_get_next_endpoint - Get next endpoint firmware node * @fwnode: Pointer to the parent firmware node * @prev: Previous endpoint node or %NULL to get the first diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig index 02d78f6cecbb..ba8acca036df 100644 --- a/drivers/bcma/Kconfig +++ b/drivers/bcma/Kconfig @@ -55,7 +55,7 @@ config BCMA_DRIVER_PCI config BCMA_DRIVER_PCI_HOSTMODE bool "Driver for PCI core working in hostmode" - depends on MIPS && BCMA_DRIVER_PCI + depends on MIPS && BCMA_DRIVER_PCI && PCI_DRIVERS_LEGACY help PCI core hostmode operation (external PCI bus). diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c index f9042bcc27a4..7b14d6280e44 100644 --- a/drivers/gpio/gpio-mmio.c +++ b/drivers/gpio/gpio-mmio.c @@ -152,14 +152,13 @@ static int bgpio_get_set_multiple(struct gpio_chip *gc, unsigned long *mask, { unsigned long get_mask = 0; unsigned long set_mask = 0; - int bit = 0; - while ((bit = find_next_bit(mask, gc->ngpio, bit)) != gc->ngpio) { - if (gc->bgpio_dir & BIT(bit)) - set_mask |= BIT(bit); - else - get_mask |= BIT(bit); - } + /* Make sure we first clear any bits that are zero when we read the register */ + *bits &= ~*mask; + + /* Exploit the fact that we know which directions are set */ + set_mask = *mask & gc->bgpio_dir; + get_mask = *mask & ~gc->bgpio_dir; if (set_mask) *bits |= gc->read_reg(gc->reg_set) & set_mask; @@ -176,13 +175,13 @@ static int bgpio_get(struct gpio_chip *gc, unsigned int gpio) /* * This only works if the bits in the GPIO register are in native endianness. - * It is dirt simple and fast in this case. (Also the most common case.) */ static int bgpio_get_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { - - *bits = gc->read_reg(gc->reg_dat) & *mask; + /* Make sure we first clear any bits that are zero when we read the register */ + *bits &= ~*mask; + *bits |= gc->read_reg(gc->reg_dat) & *mask; return 0; } @@ -196,9 +195,12 @@ static int bgpio_get_multiple_be(struct gpio_chip *gc, unsigned long *mask, unsigned long val; int bit; + /* Make sure we first clear any bits that are zero when we read the register */ + *bits &= ~*mask; + /* Create a mirrored mask */ - bit = 0; - while ((bit = find_next_bit(mask, gc->ngpio, bit)) != gc->ngpio) + bit = -1; + while ((bit = find_next_bit(mask, gc->ngpio, bit + 1)) < gc->ngpio) readmask |= bgpio_line2mask(gc, bit); /* Read the register */ @@ -208,8 +210,8 @@ static int bgpio_get_multiple_be(struct gpio_chip *gc, unsigned long *mask, * Mirror the result into the "bits" result, this will give line 0 * in bit 0 ... line 31 in bit 31 for a 32bit register. */ - bit = 0; - while ((bit = find_next_bit(&val, gc->ngpio, bit)) != gc->ngpio) + bit = -1; + while ((bit = find_next_bit(&val, gc->ngpio, bit + 1)) < gc->ngpio) *bits |= bgpio_line2mask(gc, bit); return 0; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 123585eeb87d..50f8443641b8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1211,23 +1211,6 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe) pipe_name(pipe)); } -static void assert_cursor(struct drm_i915_private *dev_priv, - enum pipe pipe, bool state) -{ - bool cur_state; - - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - cur_state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; - else - cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; - - I915_STATE_WARN(cur_state != state, - "cursor on pipe %c assertion failure (expected %s, current %s)\n", - pipe_name(pipe), onoff(state), onoff(cur_state)); -} -#define assert_cursor_enabled(d, p) assert_cursor(d, p, true) -#define assert_cursor_disabled(d, p) assert_cursor(d, p, false) - void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) { @@ -1255,77 +1238,25 @@ void assert_pipe(struct drm_i915_private *dev_priv, pipe_name(pipe), onoff(state), onoff(cur_state)); } -static void assert_plane(struct drm_i915_private *dev_priv, - enum plane plane, bool state) +static void assert_plane(struct intel_plane *plane, bool state) { - u32 val; - bool cur_state; + bool cur_state = plane->get_hw_state(plane); - val = I915_READ(DSPCNTR(plane)); - cur_state = !!(val & DISPLAY_PLANE_ENABLE); I915_STATE_WARN(cur_state != state, - "plane %c assertion failure (expected %s, current %s)\n", - plane_name(plane), onoff(state), onoff(cur_state)); + "%s assertion failure (expected %s, current %s)\n", + plane->base.name, onoff(state), onoff(cur_state)); } -#define assert_plane_enabled(d, p) assert_plane(d, p, true) -#define assert_plane_disabled(d, p) assert_plane(d, p, false) - -static void assert_planes_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe) -{ - int i; - - /* Primary planes are fixed to pipes on gen4+ */ - if (INTEL_GEN(dev_priv) >= 4) { - u32 val = I915_READ(DSPCNTR(pipe)); - I915_STATE_WARN(val & DISPLAY_PLANE_ENABLE, - "plane %c assertion failure, should be disabled but not\n", - plane_name(pipe)); - return; - } +#define assert_plane_enabled(p) assert_plane(p, true) +#define assert_plane_disabled(p) assert_plane(p, false) - /* Need to check both planes against the pipe */ - for_each_pipe(dev_priv, i) { - u32 val = I915_READ(DSPCNTR(i)); - enum pipe cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >> - DISPPLANE_SEL_PIPE_SHIFT; - I915_STATE_WARN((val & DISPLAY_PLANE_ENABLE) && pipe == cur_pipe, - "plane %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(i), pipe_name(pipe)); - } -} - -static void assert_sprites_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe) +static void assert_planes_disabled(struct intel_crtc *crtc) { - int sprite; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_plane *plane; - if (INTEL_GEN(dev_priv) >= 9) { - for_each_sprite(dev_priv, pipe, sprite) { - u32 val = I915_READ(PLANE_CTL(pipe, sprite)); - I915_STATE_WARN(val & PLANE_CTL_ENABLE, - "plane %d assertion failure, should be off on pipe %c but is still active\n", - sprite, pipe_name(pipe)); - } - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - for_each_sprite(dev_priv, pipe, sprite) { - u32 val = I915_READ(SPCNTR(pipe, PLANE_SPRITE0 + sprite)); - I915_STATE_WARN(val & SP_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - sprite_name(pipe, sprite), pipe_name(pipe)); - } - } else if (INTEL_GEN(dev_priv) >= 7) { - u32 val = I915_READ(SPRCTL(pipe)); - I915_STATE_WARN(val & SPRITE_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(pipe), pipe_name(pipe)); - } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { - u32 val = I915_READ(DVSCNTR(pipe)); - I915_STATE_WARN(val & DVS_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(pipe), pipe_name(pipe)); - } + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) + assert_plane_disabled(plane); } static void assert_vblank_disabled(struct drm_crtc *crtc) @@ -1918,9 +1849,7 @@ static void intel_enable_pipe(struct intel_crtc *crtc) DRM_DEBUG_KMS("enabling pipe %c\n", pipe_name(pipe)); - assert_planes_disabled(dev_priv, pipe); - assert_cursor_disabled(dev_priv, pipe); - assert_sprites_disabled(dev_priv, pipe); + assert_planes_disabled(crtc); /* * A pipe without a PLL won't actually be able to drive bits from @@ -1989,9 +1918,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc) * Make sure planes won't keep trying to pump pixels to us, * or we might hang the display. */ - assert_planes_disabled(dev_priv, pipe); - assert_cursor_disabled(dev_priv, pipe); - assert_sprites_disabled(dev_priv, pipe); + assert_planes_disabled(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); @@ -2820,6 +2747,23 @@ intel_set_plane_visible(struct intel_crtc_state *crtc_state, crtc_state->active_planes); } +static void intel_plane_disable_noatomic(struct intel_crtc *crtc, + struct intel_plane *plane) +{ + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + intel_set_plane_visible(crtc_state, plane_state, false); + + if (plane->id == PLANE_PRIMARY) + intel_pre_disable_primary_noatomic(&crtc->base); + + trace_intel_disable_plane(&plane->base, crtc); + plane->disable_plane(plane, crtc); +} + static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) @@ -2877,12 +2821,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - intel_set_plane_visible(to_intel_crtc_state(crtc_state), - to_intel_plane_state(plane_state), - false); - intel_pre_disable_primary_noatomic(&intel_crtc->base); - trace_intel_disable_plane(primary, intel_crtc); - intel_plane->disable_plane(intel_plane, intel_crtc); + intel_plane_disable_noatomic(intel_crtc, intel_plane); return; @@ -3385,6 +3324,31 @@ static void i9xx_disable_primary_plane(struct intel_plane *primary, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool i9xx_plane_get_hw_state(struct intel_plane *primary) +{ + + struct drm_i915_private *dev_priv = to_i915(primary->base.dev); + enum intel_display_power_domain power_domain; + enum plane plane = primary->plane; + enum pipe pipe = primary->pipe; + bool ret; + + /* + * Not 100% correct for planes that can move between pipes, + * but that's only the case for gen2-4 which don't have any + * display power wells. + */ + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(DSPCNTR(plane)) & DISPLAY_PLANE_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane) { @@ -4866,7 +4830,8 @@ void hsw_enable_ips(struct intel_crtc *crtc) * a vblank wait. */ - assert_plane_enabled(dev_priv, crtc->plane); + assert_plane_enabled(to_intel_plane(crtc->base.primary)); + if (IS_BROADWELL(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, @@ -4899,7 +4864,8 @@ void hsw_disable_ips(struct intel_crtc *crtc) if (!crtc->config->ips_enabled) return; - assert_plane_enabled(dev_priv, crtc->plane); + assert_plane_enabled(to_intel_plane(crtc->base.primary)); + if (IS_BROADWELL(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); @@ -5899,6 +5865,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; + struct intel_plane *plane; u64 domains; struct drm_atomic_state *state; struct intel_crtc_state *crtc_state; @@ -5907,11 +5874,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, if (!intel_crtc->active) return; - if (crtc->primary->state->visible) { - intel_pre_disable_primary_noatomic(crtc); + for_each_intel_plane_on_crtc(&dev_priv->drm, intel_crtc, plane) { + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); - crtc->primary->state->visible = false; + if (plane_state->base.visible) + intel_plane_disable_noatomic(intel_crtc, plane); } state = drm_atomic_state_alloc(crtc->dev); @@ -9477,6 +9445,23 @@ static void i845_disable_cursor(struct intel_plane *plane, i845_update_cursor(plane, NULL, NULL); } +static bool i845_cursor_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(PIPE_A); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -9670,6 +9655,28 @@ static void i9xx_disable_cursor(struct intel_plane *plane, i9xx_update_cursor(plane, NULL, NULL); } +static bool i9xx_cursor_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + /* + * Not 100% correct for planes that can move between pipes, + * but that's only the case for gen2-3 which don't have any + * display power wells. + */ + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} /* VESA 640x480x72Hz mode to set on the pipe */ static const struct drm_display_mode load_detect_mode = { @@ -13205,6 +13212,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = skl_update_plane; primary->disable_plane = skl_disable_plane; + primary->get_hw_state = skl_plane_get_hw_state; } else if (INTEL_GEN(dev_priv) >= 9) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); @@ -13215,6 +13223,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = skl_update_plane; primary->disable_plane = skl_disable_plane; + primary->get_hw_state = skl_plane_get_hw_state; } else if (INTEL_GEN(dev_priv) >= 4) { intel_primary_formats = i965_primary_formats; num_formats = ARRAY_SIZE(i965_primary_formats); @@ -13222,6 +13231,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = i9xx_update_primary_plane; primary->disable_plane = i9xx_disable_primary_plane; + primary->get_hw_state = i9xx_plane_get_hw_state; } else { intel_primary_formats = i8xx_primary_formats; num_formats = ARRAY_SIZE(i8xx_primary_formats); @@ -13229,6 +13239,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = i9xx_update_primary_plane; primary->disable_plane = i9xx_disable_primary_plane; + primary->get_hw_state = i9xx_plane_get_hw_state; } if (INTEL_GEN(dev_priv) >= 9) @@ -13318,10 +13329,12 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { cursor->update_plane = i845_update_cursor; cursor->disable_plane = i845_disable_cursor; + cursor->get_hw_state = i845_cursor_get_hw_state; cursor->check_plane = i845_check_cursor; } else { cursor->update_plane = i9xx_update_cursor; cursor->disable_plane = i9xx_disable_cursor; + cursor->get_hw_state = i9xx_cursor_get_hw_state; cursor->check_plane = i9xx_check_cursor; } @@ -14671,8 +14684,11 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n", pipe_name(pipe)); - assert_plane_disabled(dev_priv, PLANE_A); - assert_plane_disabled(dev_priv, PLANE_B); + WARN_ON(I915_READ(DSPCNTR(PLANE_A)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(DSPCNTR(PLANE_B)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(DSPCNTR(PLANE_C)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(CURCNTR(PIPE_A)) & CURSOR_MODE); + WARN_ON(I915_READ(CURCNTR(PIPE_B)) & CURSOR_MODE); I915_WRITE(PIPECONF(pipe), 0); POSTING_READ(PIPECONF(pipe)); @@ -14683,22 +14699,36 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) POSTING_READ(DPLL(pipe)); } -static bool -intel_check_plane_mapping(struct intel_crtc *crtc) +static bool intel_plane_mapping_ok(struct intel_crtc *crtc, + struct intel_plane *primary) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 val; + enum plane plane = primary->plane; + u32 val = I915_READ(DSPCNTR(plane)); - if (INTEL_INFO(dev_priv)->num_pipes == 1) - return true; + return (val & DISPLAY_PLANE_ENABLE) == 0 || + (val & DISPPLANE_SEL_PIPE_MASK) == DISPPLANE_SEL_PIPE(crtc->pipe); +} - val = I915_READ(DSPCNTR(!crtc->plane)); +static void +intel_sanitize_plane_mapping(struct drm_i915_private *dev_priv) +{ + struct intel_crtc *crtc; - if ((val & DISPLAY_PLANE_ENABLE) && - (!!(val & DISPPLANE_SEL_PIPE_MASK) == crtc->pipe)) - return false; + if (INTEL_GEN(dev_priv) >= 4) + return; - return true; + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + + if (intel_plane_mapping_ok(crtc, plane)) + continue; + + DRM_DEBUG_KMS("%s attached to the wrong pipe, disabling plane\n", + plane->base.name); + intel_plane_disable_noatomic(crtc, plane); + } } static bool intel_crtc_has_encoders(struct intel_crtc *crtc) @@ -14754,33 +14784,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, /* Disable everything but the primary plane */ for_each_intel_plane_on_crtc(dev, crtc, plane) { - if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) - continue; + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - trace_intel_disable_plane(&plane->base, crtc); - plane->disable_plane(plane, crtc); + if (plane_state->base.visible && + plane->base.type != DRM_PLANE_TYPE_PRIMARY) + intel_plane_disable_noatomic(crtc, plane); } } - /* We need to sanitize the plane -> pipe mapping first because this will - * disable the crtc (and hence change the state) if it is wrong. Note - * that gen4+ has a fixed plane -> pipe mapping. */ - if (INTEL_GEN(dev_priv) < 4 && !intel_check_plane_mapping(crtc)) { - bool plane; - - DRM_DEBUG_KMS("[CRTC:%d:%s] wrong plane connection detected!\n", - crtc->base.base.id, crtc->base.name); - - /* Pipe has the wrong plane attached and the plane is active. - * Temporarily change the plane mapping and disable everything - * ... */ - plane = crtc->plane; - crtc->base.primary->state->visible = true; - crtc->plane = !plane; - intel_crtc_disable_noatomic(&crtc->base, ctx); - crtc->plane = plane; - } - /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ if (crtc->active && !intel_crtc_has_encoders(crtc)) @@ -14885,24 +14897,21 @@ void i915_redisable_vga(struct drm_i915_private *dev_priv) intel_display_power_put(dev_priv, POWER_DOMAIN_VGA); } -static bool primary_get_hw_state(struct intel_plane *plane) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - - return I915_READ(DSPCNTR(plane->plane)) & DISPLAY_PLANE_ENABLE; -} - /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { - struct intel_plane *primary = to_intel_plane(crtc->base.primary); - bool visible; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane; - visible = crtc->active && primary_get_hw_state(primary); + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + bool visible = plane->get_hw_state(plane); - intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), - to_intel_plane_state(primary->base.state), - visible); + intel_set_plane_visible(crtc_state, plane_state, visible); + } } static void intel_modeset_readout_hw_state(struct drm_device *dev) @@ -15100,6 +15109,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev, /* HW state is read out, now we need to sanitize this mess. */ get_encoder_power_domains(dev_priv); + intel_sanitize_plane_mapping(dev_priv); + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6c7f8bca574e..5d77f75a9f9c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -862,6 +862,7 @@ struct intel_plane { const struct intel_plane_state *plane_state); void (*disable_plane)(struct intel_plane *plane, struct intel_crtc *crtc); + bool (*get_hw_state)(struct intel_plane *plane); int (*check_plane)(struct intel_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state); @@ -1924,6 +1925,7 @@ void skl_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc); +bool skl_plane_get_hw_state(struct intel_plane *plane); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 4fcf80ca91dd..4a8a5d918a83 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -329,6 +329,26 @@ skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +bool +skl_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(PLANE_CTL(pipe, plane_id)) & PLANE_CTL_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static void chv_update_csc(struct intel_plane *plane, uint32_t format) { @@ -506,6 +526,26 @@ vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +vlv_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(SPCNTR(pipe, plane_id)) & SP_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -646,6 +686,25 @@ ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +ivb_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(SPRCTL(pipe)) & SPRITE_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -777,6 +836,25 @@ g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +g4x_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(DVSCNTR(pipe)) & DVS_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static int intel_check_sprite_plane(struct intel_plane *plane, struct intel_crtc_state *crtc_state, @@ -1232,6 +1310,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = skl_update_plane; intel_plane->disable_plane = skl_disable_plane; + intel_plane->get_hw_state = skl_plane_get_hw_state; plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); @@ -1242,6 +1321,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = skl_update_plane; intel_plane->disable_plane = skl_disable_plane; + intel_plane->get_hw_state = skl_plane_get_hw_state; plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); @@ -1252,6 +1332,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = vlv_update_plane; intel_plane->disable_plane = vlv_disable_plane; + intel_plane->get_hw_state = vlv_plane_get_hw_state; plane_formats = vlv_plane_formats; num_plane_formats = ARRAY_SIZE(vlv_plane_formats); @@ -1267,6 +1348,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = ivb_update_plane; intel_plane->disable_plane = ivb_disable_plane; + intel_plane->get_hw_state = ivb_plane_get_hw_state; plane_formats = snb_plane_formats; num_plane_formats = ARRAY_SIZE(snb_plane_formats); @@ -1277,6 +1359,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = g4x_update_plane; intel_plane->disable_plane = g4x_disable_plane; + intel_plane->get_hw_state = g4x_plane_get_hw_state; modifiers = i9xx_plane_format_modifiers; if (IS_GEN6(dev_priv)) { diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index 0760b93e9d1f..baab93398e54 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -121,6 +121,7 @@ int nv41_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int nv44_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int nv50_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int g84_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int mcp77_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gf100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gk104_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gk20a_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 435ff8662cfa..ef687414969e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1447,11 +1447,13 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) args.nv50.ro = 0; args.nv50.kind = mem->kind; args.nv50.comp = mem->comp; + argc = sizeof(args.nv50); break; case NVIF_CLASS_MEM_GF100: args.gf100.version = 0; args.gf100.ro = 0; args.gf100.kind = mem->kind; + argc = sizeof(args.gf100); break; default: WARN_ON(1); @@ -1459,7 +1461,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) } ret = nvif_object_map_handle(&mem->mem.object, - &argc, argc, + &args, argc, &handle, &length); if (ret != 1) return ret ? ret : -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 00eeaaffeae5..08e77cd55e6e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1251,7 +1251,7 @@ nvaa_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g98_mc_new, - .mmu = g84_mmu_new, + .mmu = mcp77_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1283,7 +1283,7 @@ nvac_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g98_mc_new, - .mmu = g84_mmu_new, + .mmu = mcp77_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c index 9646adec57cb..243f0a5c8a62 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c @@ -73,7 +73,8 @@ static int nvkm_bar_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_bar *bar = nvkm_bar(subdev); - bar->func->bar1.fini(bar); + if (bar->func->bar1.fini) + bar->func->bar1.fini(bar); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c index b10077d38839..35878fb538f2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c @@ -26,7 +26,6 @@ gk20a_bar_func = { .dtor = gf100_bar_dtor, .oneinit = gf100_bar_oneinit, .bar1.init = gf100_bar_bar1_init, - .bar1.fini = gf100_bar_bar1_fini, .bar1.wait = gf100_bar_bar1_wait, .bar1.vmm = gf100_bar_bar1_vmm, .flush = g84_bar_flush, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild index 352a65f9371c..67ee983bb026 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild @@ -4,6 +4,7 @@ nvkm-y += nvkm/subdev/mmu/nv41.o nvkm-y += nvkm/subdev/mmu/nv44.o nvkm-y += nvkm/subdev/mmu/nv50.o nvkm-y += nvkm/subdev/mmu/g84.o +nvkm-y += nvkm/subdev/mmu/mcp77.o nvkm-y += nvkm/subdev/mmu/gf100.o nvkm-y += nvkm/subdev/mmu/gk104.o nvkm-y += nvkm/subdev/mmu/gk20a.o @@ -22,6 +23,7 @@ nvkm-y += nvkm/subdev/mmu/vmmnv04.o nvkm-y += nvkm/subdev/mmu/vmmnv41.o nvkm-y += nvkm/subdev/mmu/vmmnv44.o nvkm-y += nvkm/subdev/mmu/vmmnv50.o +nvkm-y += nvkm/subdev/mmu/vmmmcp77.o nvkm-y += nvkm/subdev/mmu/vmmgf100.o nvkm-y += nvkm/subdev/mmu/vmmgk104.o nvkm-y += nvkm/subdev/mmu/vmmgk20a.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c new file mode 100644 index 000000000000..0527b50730d9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c @@ -0,0 +1,41 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +mcp77_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV50}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_NV50}, nv50_mem_new, nv50_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV50}, mcp77_vmm_new, false, 0x0200 }, + .kind = nv50_mmu_kind, + .kind_sys = true, +}; + +int +mcp77_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + return nvkm_mmu_new_(&mcp77_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index 6d8f61ea467a..da06e64d8a7d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -95,6 +95,9 @@ struct nvkm_vmm_desc { const struct nvkm_vmm_desc_func *func; }; +extern const struct nvkm_vmm_desc nv50_vmm_desc_12[]; +extern const struct nvkm_vmm_desc nv50_vmm_desc_16[]; + extern const struct nvkm_vmm_desc gk104_vmm_desc_16_12[]; extern const struct nvkm_vmm_desc gk104_vmm_desc_16_16[]; extern const struct nvkm_vmm_desc gk104_vmm_desc_17_12[]; @@ -169,6 +172,11 @@ int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32, const char *, struct nvkm_vmm **); int nv04_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); +int nv50_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); +void nv50_vmm_part(struct nvkm_vmm *, struct nvkm_memory *); +int nv50_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); +void nv50_vmm_flush(struct nvkm_vmm *, int); + int gf100_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); @@ -200,6 +208,8 @@ int nv44_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); int nv50_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); +int mcp77_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); int g84_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); int gf100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c new file mode 100644 index 000000000000..e63d984cbfd4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c @@ -0,0 +1,45 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +static const struct nvkm_vmm_func +mcp77_vmm = { + .join = nv50_vmm_join, + .part = nv50_vmm_part, + .valid = nv50_vmm_valid, + .flush = nv50_vmm_flush, + .page_block = 1 << 29, + .page = { + { 16, &nv50_vmm_desc_16[0], NVKM_VMM_PAGE_xVxx }, + { 12, &nv50_vmm_desc_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +int +mcp77_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&mcp77_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c index 863a2edd9861..64f75d906202 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c @@ -32,7 +32,7 @@ static inline void nv50_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) { - u64 next = addr | map->type, data; + u64 next = addr + map->type, data; u32 pten; int log2blk; @@ -69,7 +69,7 @@ nv50_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, VMM_SPAM(vmm, "DMAA %08x %08x PTE(s)", ptei, ptes); nvkm_kmap(pt->memory); while (ptes--) { - const u64 data = *map->dma++ | map->type; + const u64 data = *map->dma++ + map->type; VMM_WO064(pt, vmm, ptei++ * 8, data); map->type += map->ctag; } @@ -163,21 +163,21 @@ nv50_vmm_pgd = { .pde = nv50_vmm_pgd_pde, }; -static const struct nvkm_vmm_desc +const struct nvkm_vmm_desc nv50_vmm_desc_12[] = { { PGT, 17, 8, 0x1000, &nv50_vmm_pgt }, { PGD, 11, 0, 0x0000, &nv50_vmm_pgd }, {} }; -static const struct nvkm_vmm_desc +const struct nvkm_vmm_desc nv50_vmm_desc_16[] = { { PGT, 13, 8, 0x1000, &nv50_vmm_pgt }, { PGD, 11, 0, 0x0000, &nv50_vmm_pgd }, {} }; -static void +void nv50_vmm_flush(struct nvkm_vmm *vmm, int level) { struct nvkm_subdev *subdev = &vmm->mmu->subdev; @@ -223,7 +223,7 @@ nv50_vmm_flush(struct nvkm_vmm *vmm, int level) mutex_unlock(&subdev->mutex); } -static int +int nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, struct nvkm_vmm_map *map) { @@ -321,7 +321,7 @@ nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, return 0; } -static void +void nv50_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) { struct nvkm_vmm_join *join; @@ -335,7 +335,7 @@ nv50_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) } } -static int +int nv50_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) { const u32 pd_offset = vmm->mmu->func->vmm.pd_offset; diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c index dc332ea56f6c..3ecffa52c814 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c @@ -102,10 +102,13 @@ static int sun4i_tmds_determine_rate(struct clk_hw *hw, goto out; } - if (abs(rate - rounded / i) < - abs(rate - best_parent / best_div)) { + if (!best_parent || + abs(rate - rounded / i / j) < + abs(rate - best_parent / best_half / + best_div)) { best_parent = rounded; - best_div = i; + best_half = i; + best_div = j; } } } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 641294aef165..fcd58145d0da 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1863,7 +1863,7 @@ u32 vmw_get_vblank_counter(struct drm_device *dev, unsigned int pipe) */ int vmw_enable_vblank(struct drm_device *dev, unsigned int pipe) { - return -ENOSYS; + return -EINVAL; } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index b8a09807c5de..3824595fece1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -266,8 +266,8 @@ static const struct drm_connector_funcs vmw_legacy_connector_funcs = { .set_property = vmw_du_connector_set_property, .destroy = vmw_ldu_connector_destroy, .reset = vmw_du_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + .atomic_duplicate_state = vmw_du_connector_duplicate_state, + .atomic_destroy_state = vmw_du_connector_destroy_state, .atomic_set_property = vmw_du_connector_atomic_set_property, .atomic_get_property = vmw_du_connector_atomic_get_property, }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index bc5f6026573d..63a4cd794b73 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -420,8 +420,8 @@ static const struct drm_connector_funcs vmw_sou_connector_funcs = { .set_property = vmw_du_connector_set_property, .destroy = vmw_sou_connector_destroy, .reset = vmw_du_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + .atomic_duplicate_state = vmw_du_connector_duplicate_state, + .atomic_destroy_state = vmw_du_connector_destroy_state, .atomic_set_property = vmw_du_connector_atomic_set_property, .atomic_get_property = vmw_du_connector_atomic_get_property, }; diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 706164b4c5be..f7829a74140c 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -821,8 +821,12 @@ void i2c_unregister_device(struct i2c_client *client) { if (!client) return; - if (client->dev.of_node) + + if (client->dev.of_node) { of_node_clear_flag(client->dev.of_node, OF_POPULATED); + of_node_put(client->dev.of_node); + } + if (ACPI_COMPANION(&client->dev)) acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev)); device_unregister(&client->dev); diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c index 4bb9927afd01..a1082c04ac5c 100644 --- a/drivers/i2c/i2c-core-smbus.c +++ b/drivers/i2c/i2c-core-smbus.c @@ -397,16 +397,17 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, the underlying bus driver */ break; case I2C_SMBUS_I2C_BLOCK_DATA: + if (data->block[0] > I2C_SMBUS_BLOCK_MAX) { + dev_err(&adapter->dev, "Invalid block %s size %d\n", + read_write == I2C_SMBUS_READ ? "read" : "write", + data->block[0]); + return -EINVAL; + } + if (read_write == I2C_SMBUS_READ) { msg[1].len = data->block[0]; } else { msg[0].len = data->block[0] + 1; - if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) { - dev_err(&adapter->dev, - "Invalid block write size %d\n", - data->block[0]); - return -EINVAL; - } for (i = 1; i <= data->block[0]; i++) msgbuf0[i] = data->block[i]; } diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c index 6c51d404874b..c37aea9ac272 100644 --- a/drivers/input/misc/twl4030-vibra.c +++ b/drivers/input/misc/twl4030-vibra.c @@ -178,12 +178,14 @@ static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops, twl4030_vibra_suspend, twl4030_vibra_resume); static bool twl4030_vibra_check_coexist(struct twl4030_vibra_data *pdata, - struct device_node *node) + struct device_node *parent) { + struct device_node *node; + if (pdata && pdata->coexist) return true; - node = of_find_node_by_name(node, "codec"); + node = of_get_child_by_name(parent, "codec"); if (node) { of_node_put(node); return true; diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c index 5690eb7ff954..15e0d352c4cc 100644 --- a/drivers/input/misc/twl6040-vibra.c +++ b/drivers/input/misc/twl6040-vibra.c @@ -248,8 +248,7 @@ static int twl6040_vibra_probe(struct platform_device *pdev) int vddvibr_uV = 0; int error; - of_node_get(twl6040_core_dev->of_node); - twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node, + twl6040_core_node = of_get_child_by_name(twl6040_core_dev->of_node, "vibra"); if (!twl6040_core_node) { dev_err(&pdev->dev, "parent of node is missing?\n"); diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 579b899add26..dbe57da8c1a1 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1250,29 +1250,32 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_MULTI: if (priv->flags & ALPS_BUTTONPAD) { if (IS_SS4PLUS_DEV(priv->dev_id)) { - f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); - f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + f->mt[2].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[3].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + no_data_x = SS4_PLUS_MFPACKET_NO_AX_BL; } else { f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); + no_data_x = SS4_MFPACKET_NO_AX_BL; } + no_data_y = SS4_MFPACKET_NO_AY_BL; f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0); f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1); - no_data_x = SS4_MFPACKET_NO_AX_BL; - no_data_y = SS4_MFPACKET_NO_AY_BL; } else { if (IS_SS4PLUS_DEV(priv->dev_id)) { - f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); - f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + f->mt[2].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[3].x = SS4_PLUS_STD_MF_X_V2(p, 1); + no_data_x = SS4_PLUS_MFPACKET_NO_AX; } else { - f->mt[0].x = SS4_STD_MF_X_V2(p, 0); - f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + f->mt[2].x = SS4_STD_MF_X_V2(p, 0); + f->mt[3].x = SS4_STD_MF_X_V2(p, 1); + no_data_x = SS4_MFPACKET_NO_AX; } + no_data_y = SS4_MFPACKET_NO_AY; + f->mt[2].y = SS4_STD_MF_Y_V2(p, 0); f->mt[3].y = SS4_STD_MF_Y_V2(p, 1); - no_data_x = SS4_MFPACKET_NO_AX; - no_data_y = SS4_MFPACKET_NO_AY; } f->first_mp = 0; diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index c80a7c76cb76..79b6d69d1486 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -141,10 +141,12 @@ enum SS4_PACKET_ID { #define SS4_TS_Z_V2(_b) (s8)(_b[4] & 0x7F) -#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */ -#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */ -#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coordinate value */ -#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coordinate value */ +#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */ +#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */ +#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coord value */ +#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coord value */ +#define SS4_PLUS_MFPACKET_NO_AX 4080 /* SS4 PLUS, X */ +#define SS4_PLUS_MFPACKET_NO_AX_BL 4088 /* Buttonless SS4 PLUS, X */ /* * enum V7_PACKET_ID - defines the packet type for V7 diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index ee5466a374bf..cd9f61cb3fc6 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -173,6 +173,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0046", /* X250 */ "LEN004a", /* W541 */ "LEN200f", /* T450s */ + "LEN2018", /* T460p */ NULL }; diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 4f2bb5947a4e..141ea228aac6 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -230,8 +230,10 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id) rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, "Failed to process interrupt request: %d\n", ret); - if (count) + if (count) { kfree(attn_data.data); + attn_data.data = NULL; + } if (!kfifo_is_empty(&drvdata->attn_fifo)) return rmi_irq_fn(irq, dev_id); diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c index 7ed828a51f4c..3486d9403805 100644 --- a/drivers/input/touchscreen/88pm860x-ts.c +++ b/drivers/input/touchscreen/88pm860x-ts.c @@ -126,7 +126,7 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, int data, n, ret; if (!np) return -ENODEV; - np = of_find_node_by_name(np, "touch"); + np = of_get_child_by_name(np, "touch"); if (!np) { dev_err(&pdev->dev, "Can't find touch node\n"); return -EINVAL; @@ -144,13 +144,13 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, if (data) { ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } /* set tsi prebias time */ if (!of_property_read_u32(np, "marvell,88pm860x-tsi-prebias", &data)) { ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } /* set prebias & prechg time of pen detect */ data = 0; @@ -161,10 +161,18 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, if (data) { ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } of_property_read_u32(np, "marvell,88pm860x-resistor-X", res_x); + + of_node_put(np); + return 0; + +err_put_node: + of_node_put(np); + + return -EINVAL; } #else #define pm860x_touch_dt_init(x, y, z) (-1) diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c index 8d7f9c8f2771..9642f103b726 100644 --- a/drivers/input/touchscreen/of_touchscreen.c +++ b/drivers/input/touchscreen/of_touchscreen.c @@ -13,6 +13,7 @@ #include <linux/input.h> #include <linux/input/mt.h> #include <linux/input/touchscreen.h> +#include <linux/module.h> static bool touchscreen_get_prop_u32(struct device *dev, const char *property, @@ -185,3 +186,6 @@ void touchscreen_report_pos(struct input_dev *input, input_report_abs(input, multitouch ? ABS_MT_POSITION_Y : ABS_Y, y); } EXPORT_SYMBOL(touchscreen_report_pos); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Device-tree helpers functions for touchscreen devices"); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9fc12f556534..554d60394c06 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1954,10 +1954,15 @@ static int crypt_setkey(struct crypt_config *cc) /* Ignore extra keys (which are used for IV etc) */ subkey_size = crypt_subkey_size(cc); - if (crypt_integrity_hmac(cc)) + if (crypt_integrity_hmac(cc)) { + if (subkey_size < cc->key_mac_size) + return -EINVAL; + crypt_copy_authenckey(cc->authenc_key, cc->key, subkey_size - cc->key_mac_size, cc->key_mac_size); + } + for (i = 0; i < cc->tfms_count; i++) { if (crypt_integrity_hmac(cc)) r = crypto_aead_setkey(cc->cipher_tfm.tfms_aead[i], @@ -2053,9 +2058,6 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string ret = crypt_setkey(cc); - /* wipe the kernel key payload copy in each case */ - memset(cc->key, 0, cc->key_size * sizeof(u8)); - if (!ret) { set_bit(DM_CRYPT_KEY_VALID, &cc->flags); kzfree(cc->key_string); @@ -2523,6 +2525,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, char *cipher_in, char *key) } } + /* wipe the kernel key payload copy */ + if (cc->key_string) + memset(cc->key, 0, cc->key_size * sizeof(u8)); + return ret; } @@ -2740,6 +2746,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->tag_pool_max_sectors * cc->on_disk_tag_size); if (!cc->tag_pool) { ti->error = "Cannot allocate integrity tags mempool"; + ret = -ENOMEM; goto bad; } @@ -2961,6 +2968,9 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) return ret; if (cc->iv_gen_ops && cc->iv_gen_ops->init) ret = cc->iv_gen_ops->init(cc); + /* wipe the kernel key payload copy */ + if (cc->key_string) + memset(cc->key, 0, cc->key_size * sizeof(u8)); return ret; } if (argc == 2 && !strcasecmp(argv[1], "wipe")) { @@ -3007,7 +3017,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 18, 0}, + .version = {1, 18, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 05c7bfd0c9d9..46d7c8749222 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2559,7 +2559,8 @@ static int create_journal(struct dm_integrity_c *ic, char **error) int r = 0; unsigned i; __u64 journal_pages, journal_desc_size, journal_tree_size; - unsigned char *crypt_data = NULL; + unsigned char *crypt_data = NULL, *crypt_iv = NULL; + struct skcipher_request *req = NULL; ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL); ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL); @@ -2617,9 +2618,20 @@ static int create_journal(struct dm_integrity_c *ic, char **error) if (blocksize == 1) { struct scatterlist *sg; - SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); - unsigned char iv[ivsize]; - skcipher_request_set_tfm(req, ic->journal_crypt); + + req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); + if (!req) { + *error = "Could not allocate crypt request"; + r = -ENOMEM; + goto bad; + } + + crypt_iv = kmalloc(ivsize, GFP_KERNEL); + if (!crypt_iv) { + *error = "Could not allocate iv"; + r = -ENOMEM; + goto bad; + } ic->journal_xor = dm_integrity_alloc_page_list(ic); if (!ic->journal_xor) { @@ -2641,9 +2653,9 @@ static int create_journal(struct dm_integrity_c *ic, char **error) sg_set_buf(&sg[i], va, PAGE_SIZE); } sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids); - memset(iv, 0x00, ivsize); + memset(crypt_iv, 0x00, ivsize); - skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, iv); + skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv); init_completion(&comp.comp); comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (do_crypt(true, req, &comp)) @@ -2659,10 +2671,22 @@ static int create_journal(struct dm_integrity_c *ic, char **error) crypto_free_skcipher(ic->journal_crypt); ic->journal_crypt = NULL; } else { - SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); - unsigned char iv[ivsize]; unsigned crypt_len = roundup(ivsize, blocksize); + req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); + if (!req) { + *error = "Could not allocate crypt request"; + r = -ENOMEM; + goto bad; + } + + crypt_iv = kmalloc(ivsize, GFP_KERNEL); + if (!crypt_iv) { + *error = "Could not allocate iv"; + r = -ENOMEM; + goto bad; + } + crypt_data = kmalloc(crypt_len, GFP_KERNEL); if (!crypt_data) { *error = "Unable to allocate crypt data"; @@ -2670,8 +2694,6 @@ static int create_journal(struct dm_integrity_c *ic, char **error) goto bad; } - skcipher_request_set_tfm(req, ic->journal_crypt); - ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal); if (!ic->journal_scatterlist) { *error = "Unable to allocate sg list"; @@ -2695,12 +2717,12 @@ static int create_journal(struct dm_integrity_c *ic, char **error) struct skcipher_request *section_req; __u32 section_le = cpu_to_le32(i); - memset(iv, 0x00, ivsize); + memset(crypt_iv, 0x00, ivsize); memset(crypt_data, 0x00, crypt_len); memcpy(crypt_data, §ion_le, min((size_t)crypt_len, sizeof(section_le))); sg_init_one(&sg, crypt_data, crypt_len); - skcipher_request_set_crypt(req, &sg, &sg, crypt_len, iv); + skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv); init_completion(&comp.comp); comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (do_crypt(true, req, &comp)) @@ -2758,6 +2780,9 @@ retest_commit_id: } bad: kfree(crypt_data); + kfree(crypt_iv); + skcipher_request_free(req); + return r; } diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index d31d18d9727c..36ef284ad086 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -80,10 +80,14 @@ #define SECTOR_TO_BLOCK_SHIFT 3 /* + * For btree insert: * 3 for btree insert + * 2 for btree lookup used within space map + * For btree remove: + * 2 for shadow spine + + * 4 for rebalance 3 child node */ -#define THIN_MAX_CONCURRENT_LOCKS 5 +#define THIN_MAX_CONCURRENT_LOCKS 6 /* This should be plenty */ #define SPACE_MAP_ROOT_SIZE 128 diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index f21ce6a3d4cf..58b319757b1e 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -683,23 +683,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) pn->keys[1] = rn->keys[0]; memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64)); - /* - * rejig the spine. This is ugly, since it knows too - * much about the spine - */ - if (s->nodes[0] != new_parent) { - unlock_block(s->info, s->nodes[0]); - s->nodes[0] = new_parent; - } - if (key < le64_to_cpu(rn->keys[0])) { - unlock_block(s->info, right); - s->nodes[1] = left; - } else { - unlock_block(s->info, left); - s->nodes[1] = right; - } - s->count = 2; - + unlock_block(s->info, left); + unlock_block(s->info, right); return 0; } diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 85140c9af581..8b941f814472 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -687,6 +687,20 @@ static inline void esdhc_pltfm_set_clock(struct sdhci_host *host, return; } + /* For i.MX53 eSDHCv3, SYSCTL.SDCLKFS may not be set to 0. */ + if (is_imx53_esdhc(imx_data)) { + /* + * According to the i.MX53 reference manual, if DLLCTRL[10] can + * be set, then the controller is eSDHCv3, else it is eSDHCv2. + */ + val = readl(host->ioaddr + ESDHC_DLL_CTRL); + writel(val | BIT(10), host->ioaddr + ESDHC_DLL_CTRL); + temp = readl(host->ioaddr + ESDHC_DLL_CTRL); + writel(val, host->ioaddr + ESDHC_DLL_CTRL); + if (temp & BIT(10)) + pre_div = 2; + } + temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL); temp &= ~(ESDHC_CLOCK_IPGEN | ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN | ESDHC_CLOCK_MASK); diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index b8029ea03307..433a14b9f731 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -264,7 +264,6 @@ static int cfhsi_tx_frm(struct cfhsi_desc *desc, struct cfhsi *cfhsi) } /* Create payload CAIF frames. */ - pfrm = desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ; while (nfrms < CFHSI_MAX_PKTS) { struct caif_payload_info *info; int hpad; diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 49ed8737871f..2594f7779c6f 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1749,7 +1749,7 @@ static int m_can_plat_remove(struct platform_device *pdev) return 0; } -static int m_can_runtime_suspend(struct device *dev) +static int __maybe_unused m_can_runtime_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct m_can_priv *priv = netdev_priv(ndev); @@ -1760,7 +1760,7 @@ static int m_can_runtime_suspend(struct device *dev) return 0; } -static int m_can_runtime_resume(struct device *dev) +static int __maybe_unused m_can_runtime_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct m_can_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 18ff127020c0..dd161c5eea8e 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -184,7 +184,7 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) void *cmd_head = pcan_usb_fd_cmd_buffer(dev); int err = 0; u8 *packet_ptr; - int i, n = 1, packet_len; + int packet_len; ptrdiff_t cmd_len; /* usb device unregistered? */ @@ -201,17 +201,13 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) } packet_ptr = cmd_head; + packet_len = cmd_len; /* firmware is not able to re-assemble 512 bytes buffer in full-speed */ - if ((dev->udev->speed != USB_SPEED_HIGH) && - (cmd_len > PCAN_UFD_LOSPD_PKT_SIZE)) { - packet_len = PCAN_UFD_LOSPD_PKT_SIZE; - n += cmd_len / packet_len; - } else { - packet_len = cmd_len; - } + if (unlikely(dev->udev->speed != USB_SPEED_HIGH)) + packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE); - for (i = 0; i < n; i++) { + do { err = usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, PCAN_USBPRO_EP_CMDOUT), @@ -224,7 +220,12 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) } packet_ptr += packet_len; - } + cmd_len -= packet_len; + + if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE) + packet_len = cmd_len; + + } while (packet_len > 0); return err; } diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 54cb00a27408..eb328bade225 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3999,9 +3999,11 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) out_mdio: mv88e6xxx_mdios_unregister(chip); out_g1_vtu_prob_irq: - mv88e6xxx_g1_vtu_prob_irq_free(chip); + if (chip->irq > 0) + mv88e6xxx_g1_vtu_prob_irq_free(chip); out_g1_atu_prob_irq: - mv88e6xxx_g1_atu_prob_irq_free(chip); + if (chip->irq > 0) + mv88e6xxx_g1_atu_prob_irq_free(chip); out_g2_irq: if (chip->info->g2_irqs > 0 && chip->irq > 0) mv88e6xxx_g2_irq_free(chip); diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c index b97de9d36337..20d941f4273b 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c @@ -377,7 +377,7 @@ int mv88e6xxx_g1_atu_prob_irq_setup(struct mv88e6xxx_chip *chip) chip->atu_prob_irq = irq_find_mapping(chip->g1_irq.domain, MV88E6XXX_G1_STS_IRQ_ATU_PROB); if (chip->atu_prob_irq < 0) - return chip->device_irq; + return chip->atu_prob_irq; err = request_threaded_irq(chip->atu_prob_irq, NULL, mv88e6xxx_g1_atu_prob_irq_thread_fn, diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index 53d58a01484a..7997961647de 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -570,7 +570,7 @@ int mv88e6xxx_g1_vtu_prob_irq_setup(struct mv88e6xxx_chip *chip) chip->vtu_prob_irq = irq_find_mapping(chip->g1_irq.domain, MV88E6XXX_G1_STS_IRQ_VTU_PROB); if (chip->vtu_prob_irq < 0) - return chip->device_irq; + return chip->vtu_prob_irq; err = request_threaded_irq(chip->vtu_prob_irq, NULL, mv88e6xxx_g1_vtu_prob_irq_thread_fn, diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile b/drivers/net/ethernet/aquantia/atlantic/Makefile index e4ae696920ef..686f6d8c9e79 100644 --- a/drivers/net/ethernet/aquantia/atlantic/Makefile +++ b/drivers/net/ethernet/aquantia/atlantic/Makefile @@ -39,4 +39,5 @@ atlantic-objs := aq_main.o \ hw_atl/hw_atl_a0.o \ hw_atl/hw_atl_b0.o \ hw_atl/hw_atl_utils.o \ + hw_atl/hw_atl_utils_fw2x.o \ hw_atl/hw_atl_llh.o diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 105fdb958cef..0b49f1aeebd3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -65,7 +65,13 @@ /*#define AQ_CFG_MAC_ADDR_PERMANENT {0x30, 0x0E, 0xE3, 0x12, 0x34, 0x56}*/ -#define AQ_CFG_FC_MODE 3U +#define AQ_NIC_FC_OFF 0U +#define AQ_NIC_FC_TX 1U +#define AQ_NIC_FC_RX 2U +#define AQ_NIC_FC_FULL 3U +#define AQ_NIC_FC_AUTO 4U + +#define AQ_CFG_FC_MODE AQ_NIC_FC_FULL #define AQ_CFG_SPEED_MSK 0xFFFFU /* 0xFFFFU==auto_neg */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h index f79da4b5900b..d52b088ff8f0 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h @@ -19,4 +19,42 @@ #include "aq_cfg.h" #include "aq_utils.h" +#define PCI_VENDOR_ID_AQUANTIA 0x1D6A + +#define AQ_DEVICE_ID_0001 0x0001 +#define AQ_DEVICE_ID_D100 0xD100 +#define AQ_DEVICE_ID_D107 0xD107 +#define AQ_DEVICE_ID_D108 0xD108 +#define AQ_DEVICE_ID_D109 0xD109 + +#define AQ_DEVICE_ID_AQC100 0x00B1 +#define AQ_DEVICE_ID_AQC107 0x07B1 +#define AQ_DEVICE_ID_AQC108 0x08B1 +#define AQ_DEVICE_ID_AQC109 0x09B1 +#define AQ_DEVICE_ID_AQC111 0x11B1 +#define AQ_DEVICE_ID_AQC112 0x12B1 + +#define AQ_DEVICE_ID_AQC100S 0x80B1 +#define AQ_DEVICE_ID_AQC107S 0x87B1 +#define AQ_DEVICE_ID_AQC108S 0x88B1 +#define AQ_DEVICE_ID_AQC109S 0x89B1 +#define AQ_DEVICE_ID_AQC111S 0x91B1 +#define AQ_DEVICE_ID_AQC112S 0x92B1 + +#define AQ_DEVICE_ID_AQC111E 0x51B1 +#define AQ_DEVICE_ID_AQC112E 0x52B1 + +#define HW_ATL_NIC_NAME "aQuantia AQtion 10Gbit Network Adapter" + +#define AQ_HWREV_ANY 0 +#define AQ_HWREV_1 1 +#define AQ_HWREV_2 2 + +#define AQ_NIC_RATE_10G BIT(0) +#define AQ_NIC_RATE_5G BIT(1) +#define AQ_NIC_RATE_5GSR BIT(2) +#define AQ_NIC_RATE_2GS BIT(3) +#define AQ_NIC_RATE_1G BIT(4) +#define AQ_NIC_RATE_100M BIT(5) + #endif /* AQ_COMMON_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 5d67f1335f4d..a2d416b24ffc 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -23,6 +23,7 @@ struct aq_hw_caps_s { u64 hw_features; u64 link_speed_msk; unsigned int hw_priv_flags; + u32 media_type; u32 rxds; u32 txds; u32 txhwb_alignment; @@ -30,7 +31,7 @@ struct aq_hw_caps_s { u32 vecs; u32 mtu; u32 mac_regs_count; - u8 ports; + u32 hw_alive_check_addr; u8 msix_irqs; u8 tcs; u8 rxd_alignment; @@ -41,7 +42,6 @@ struct aq_hw_caps_s { u8 rx_rings; bool flow_control; bool is_64_dma; - u32 fw_ver_expected; }; struct aq_hw_link_status_s { @@ -95,12 +95,15 @@ struct aq_stats_s { #define AQ_NIC_FLAGS_IS_NOT_TX_READY (AQ_NIC_FLAGS_IS_NOT_READY | \ AQ_NIC_LINK_DOWN) +#define AQ_HW_MEDIA_TYPE_TP 1U +#define AQ_HW_MEDIA_TYPE_FIBRE 2U + struct aq_hw_s { atomic_t flags; + u8 rbl_enabled:1; struct aq_nic_cfg_s *aq_nic_cfg; - struct aq_pci_func_s *aq_pci_func; + const struct aq_fw_ops *aq_fw_ops; void __iomem *mmio; - unsigned int not_ff_addr; struct aq_hw_link_status_s aq_link_status; struct hw_aq_atl_utils_mbox mbox; struct hw_atl_stats_s last_stats; @@ -119,19 +122,9 @@ struct aq_hw_s { struct aq_ring_s; struct aq_ring_param_s; -struct aq_nic_cfg_s; struct sk_buff; struct aq_hw_ops { - struct aq_hw_s *(*create)(struct aq_pci_func_s *aq_pci_func, - unsigned int port); - - void (*destroy)(struct aq_hw_s *self); - - int (*get_hw_caps)(struct aq_hw_s *self, - struct aq_hw_caps_s *aq_hw_caps, - unsigned short device, - unsigned short subsystem_device); int (*hw_ring_tx_xmit)(struct aq_hw_s *self, struct aq_ring_s *aq_ring, unsigned int frags); @@ -145,15 +138,8 @@ struct aq_hw_ops { int (*hw_ring_tx_head_update)(struct aq_hw_s *self, struct aq_ring_s *aq_ring); - int (*hw_get_mac_permanent)(struct aq_hw_s *self, - u8 *mac); - int (*hw_set_mac_address)(struct aq_hw_s *self, u8 *mac_addr); - int (*hw_get_link_status)(struct aq_hw_s *self); - - int (*hw_set_link_speed)(struct aq_hw_s *self, u32 speed); - int (*hw_reset)(struct aq_hw_s *self); int (*hw_init)(struct aq_hw_s *self, u8 *mac_addr); @@ -207,8 +193,6 @@ struct aq_hw_ops { const struct aq_hw_caps_s *aq_hw_caps, u32 *regs_buff); - int (*hw_update_stats)(struct aq_hw_s *self); - struct aq_stats_s *(*hw_get_hw_stats)(struct aq_hw_s *self); int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version); @@ -218,4 +202,20 @@ struct aq_hw_ops { int (*hw_set_power)(struct aq_hw_s *self, unsigned int power_state); }; +struct aq_fw_ops { + int (*init)(struct aq_hw_s *self); + + int (*reset)(struct aq_hw_s *self); + + int (*get_mac_permanent)(struct aq_hw_s *self, u8 *mac); + + int (*set_link_speed)(struct aq_hw_s *self, u32 speed); + + int (*set_state)(struct aq_hw_s *self, enum hal_atl_utils_fw_state_e state); + + int (*update_link_status)(struct aq_hw_s *self); + + int (*update_stats)(struct aq_hw_s *self); +}; + #endif /* AQ_HW_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c index 27e250d61da7..d526c4f19d34 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c @@ -13,6 +13,7 @@ #include "aq_hw_utils.h" #include "aq_hw.h" +#include "aq_nic.h" void aq_hw_write_reg_bit(struct aq_hw_s *aq_hw, u32 addr, u32 msk, u32 shift, u32 val) @@ -39,7 +40,9 @@ u32 aq_hw_read_reg(struct aq_hw_s *hw, u32 reg) { u32 value = readl(hw->mmio + reg); - if ((~0U) == value && (~0U) == readl(hw->mmio + hw->not_ff_addr)) + if ((~0U) == value && + (~0U) == readl(hw->mmio + + hw->aq_nic_cfg->aq_hw_caps->hw_alive_check_addr)) aq_utils_obj_set(&hw->flags, AQ_HW_FLAG_ERR_UNPLUG); return value; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h index 03b72ddbffb9..dc88a1221f1d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h @@ -35,6 +35,9 @@ do { \ } \ } while (0) +#define aq_pr_err(...) pr_err(AQ_CFG_DRV_NAME ": " __VA_ARGS__) +#define aq_pr_trace(...) pr_info(AQ_CFG_DRV_NAME ": " __VA_ARGS__) + struct aq_hw_s; void aq_hw_write_reg_bit(struct aq_hw_s *aq_hw, u32 addr, u32 msk, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 887bc846375a..ba5fe8c4125d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -43,14 +43,9 @@ struct net_device *aq_ndev_alloc(void) static int aq_ndev_open(struct net_device *ndev) { - struct aq_nic_s *aq_nic = NULL; int err = 0; + struct aq_nic_s *aq_nic = netdev_priv(ndev); - aq_nic = aq_nic_alloc_hot(ndev); - if (!aq_nic) { - err = -ENOMEM; - goto err_exit; - } err = aq_nic_init(aq_nic); if (err < 0) goto err_exit; @@ -73,7 +68,6 @@ static int aq_ndev_close(struct net_device *ndev) if (err < 0) goto err_exit; aq_nic_deinit(aq_nic); - aq_nic_free_hot_resources(aq_nic); err_exit: return err; @@ -145,15 +139,13 @@ static void aq_ndev_set_multicast_settings(struct net_device *ndev) err = aq_nic_set_packet_filter(aq_nic, ndev->flags); if (err < 0) - goto err_exit; + return; if (netdev_mc_count(ndev)) { err = aq_nic_set_multicast_list(aq_nic, ndev); if (err < 0) - goto err_exit; + return; } - -err_exit:; } static const struct net_device_ops aq_ndev_ops = { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index d98251371ee4..ebbaf63eaf47 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -14,7 +14,6 @@ #include "aq_vec.h" #include "aq_hw.h" #include "aq_pci_func.h" -#include "aq_main.h" #include <linux/moduleparam.h> #include <linux/netdevice.h> @@ -61,19 +60,13 @@ static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues) rss_params->indirection_table[i] = i & (num_rss_queues - 1); } -/* Fills aq_nic_cfg with valid defaults */ -static void aq_nic_cfg_init_defaults(struct aq_nic_s *self) +/* Checks hw_caps and 'corrects' aq_nic_cfg in runtime */ +void aq_nic_cfg_start(struct aq_nic_s *self) { struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; - cfg->aq_hw_caps = &self->aq_hw_caps; - - cfg->vecs = AQ_CFG_VECS_DEF; cfg->tcs = AQ_CFG_TCS_DEF; - cfg->rxds = AQ_CFG_RXDS_DEF; - cfg->txds = AQ_CFG_TXDS_DEF; - cfg->is_polling = AQ_CFG_IS_POLLING_DEF; cfg->itr = aq_itr; @@ -94,19 +87,13 @@ static void aq_nic_cfg_init_defaults(struct aq_nic_s *self) cfg->vlan_id = 0U; aq_nic_rss_init(self, cfg->num_rss_queues); -} - -/* Checks hw_caps and 'corrects' aq_nic_cfg in runtime */ -int aq_nic_cfg_start(struct aq_nic_s *self) -{ - struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; /*descriptors */ - cfg->rxds = min(cfg->rxds, cfg->aq_hw_caps->rxds); - cfg->txds = min(cfg->txds, cfg->aq_hw_caps->txds); + cfg->rxds = min(cfg->aq_hw_caps->rxds, AQ_CFG_RXDS_DEF); + cfg->txds = min(cfg->aq_hw_caps->txds, AQ_CFG_TXDS_DEF); /*rss rings */ - cfg->vecs = min(cfg->vecs, cfg->aq_hw_caps->vecs); + cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF); cfg->vecs = min(cfg->vecs, num_online_cpus()); /* cfg->vecs should be power of 2 for RSS */ if (cfg->vecs >= 8U) @@ -120,23 +107,22 @@ int aq_nic_cfg_start(struct aq_nic_s *self) cfg->num_rss_queues = min(cfg->vecs, AQ_CFG_NUM_RSS_QUEUES_DEF); - cfg->irq_type = aq_pci_func_get_irq_type(self->aq_pci_func); + cfg->irq_type = aq_pci_func_get_irq_type(self); if ((cfg->irq_type == AQ_HW_IRQ_LEGACY) || - (self->aq_hw_caps.vecs == 1U) || + (cfg->aq_hw_caps->vecs == 1U) || (cfg->vecs == 1U)) { cfg->is_rss = 0U; cfg->vecs = 1U; } - cfg->link_speed_msk &= self->aq_hw_caps.link_speed_msk; - cfg->hw_features = self->aq_hw_caps.hw_features; - return 0; + cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk; + cfg->hw_features = cfg->aq_hw_caps->hw_features; } static int aq_nic_update_link_status(struct aq_nic_s *self) { - int err = self->aq_hw_ops.hw_get_link_status(self->aq_hw); + int err = self->aq_fw_ops->update_link_status(self->aq_hw); if (err) return err; @@ -178,8 +164,8 @@ static void aq_nic_service_timer_cb(struct timer_list *t) if (err) goto err_exit; - if (self->aq_hw_ops.hw_update_stats) - self->aq_hw_ops.hw_update_stats(self->aq_hw); + if (self->aq_fw_ops->update_stats) + self->aq_fw_ops->update_stats(self->aq_hw); aq_nic_update_ndev_stats(self); @@ -205,51 +191,6 @@ static void aq_nic_polling_timer_cb(struct timer_list *t) AQ_CFG_POLLING_TIMER_INTERVAL); } -struct aq_nic_s *aq_nic_alloc_cold(struct pci_dev *pdev, - struct aq_pci_func_s *aq_pci_func, - unsigned int port, - const struct aq_hw_ops *aq_hw_ops) -{ - struct net_device *ndev = NULL; - struct aq_nic_s *self = NULL; - int err = 0; - - ndev = aq_ndev_alloc(); - if (!ndev) { - err = -ENOMEM; - goto err_exit; - } - - self = netdev_priv(ndev); - - SET_NETDEV_DEV(ndev, &pdev->dev); - - ndev->if_port = port; - self->ndev = ndev; - - self->aq_pci_func = aq_pci_func; - - self->aq_hw_ops = *aq_hw_ops; - self->port = (u8)port; - - self->aq_hw = self->aq_hw_ops.create(aq_pci_func, self->port); - self->aq_hw->aq_nic_cfg = &self->aq_nic_cfg; - - err = self->aq_hw_ops.get_hw_caps(self->aq_hw, &self->aq_hw_caps, - pdev->device, pdev->subsystem_device); - if (err < 0) - goto err_exit; - - aq_nic_cfg_init_defaults(self); - -err_exit: - if (err < 0) { - aq_nic_free_hot_resources(self); - self = NULL; - } - return self; -} - int aq_nic_ndev_register(struct aq_nic_s *self) { int err = 0; @@ -258,9 +199,14 @@ int aq_nic_ndev_register(struct aq_nic_s *self) err = -EINVAL; goto err_exit; } - err = self->aq_hw_ops.hw_get_mac_permanent(self->aq_hw, + + err = hw_atl_utils_initfw(self->aq_hw, &self->aq_fw_ops); + if (err) + goto err_exit; + + err = self->aq_fw_ops->get_mac_permanent(self->aq_hw, self->ndev->dev_addr); - if (err < 0) + if (err) goto err_exit; #if defined(AQ_CFG_MAC_ADDR_PERMANENT) @@ -271,19 +217,29 @@ int aq_nic_ndev_register(struct aq_nic_s *self) } #endif + for (self->aq_vecs = 0; self->aq_vecs < aq_nic_get_cfg(self)->vecs; + self->aq_vecs++) { + self->aq_vec[self->aq_vecs] = + aq_vec_alloc(self, self->aq_vecs, aq_nic_get_cfg(self)); + if (!self->aq_vec[self->aq_vecs]) { + err = -ENOMEM; + goto err_exit; + } + } + netif_carrier_off(self->ndev); netif_tx_disable(self->ndev); err = register_netdev(self->ndev); - if (err < 0) + if (err) goto err_exit; err_exit: return err; } -int aq_nic_ndev_init(struct aq_nic_s *self) +void aq_nic_ndev_init(struct aq_nic_s *self) { const struct aq_hw_caps_s *aq_hw_caps = self->aq_nic_cfg.aq_hw_caps; struct aq_nic_cfg_s *aq_nic_cfg = &self->aq_nic_cfg; @@ -292,62 +248,8 @@ int aq_nic_ndev_init(struct aq_nic_s *self) self->ndev->features = aq_hw_caps->hw_features; self->ndev->priv_flags = aq_hw_caps->hw_priv_flags; self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN; - self->ndev->max_mtu = self->aq_hw_caps.mtu - ETH_FCS_LEN - ETH_HLEN; + self->ndev->max_mtu = aq_hw_caps->mtu - ETH_FCS_LEN - ETH_HLEN; - return 0; -} - -void aq_nic_ndev_free(struct aq_nic_s *self) -{ - if (!self->ndev) - goto err_exit; - - if (self->ndev->reg_state == NETREG_REGISTERED) - unregister_netdev(self->ndev); - - if (self->aq_hw) - self->aq_hw_ops.destroy(self->aq_hw); - - free_netdev(self->ndev); - -err_exit:; -} - -struct aq_nic_s *aq_nic_alloc_hot(struct net_device *ndev) -{ - struct aq_nic_s *self = NULL; - int err = 0; - - if (!ndev) { - err = -EINVAL; - goto err_exit; - } - self = netdev_priv(ndev); - - if (!self) { - err = -EINVAL; - goto err_exit; - } - if (netif_running(ndev)) - netif_tx_disable(ndev); - netif_carrier_off(self->ndev); - - for (self->aq_vecs = 0; self->aq_vecs < self->aq_nic_cfg.vecs; - self->aq_vecs++) { - self->aq_vec[self->aq_vecs] = - aq_vec_alloc(self, self->aq_vecs, &self->aq_nic_cfg); - if (!self->aq_vec[self->aq_vecs]) { - err = -ENOMEM; - goto err_exit; - } - } - -err_exit: - if (err < 0) { - aq_nic_free_hot_resources(self); - self = NULL; - } - return self; } void aq_nic_set_tx_ring(struct aq_nic_s *self, unsigned int idx, @@ -368,18 +270,20 @@ int aq_nic_init(struct aq_nic_s *self) unsigned int i = 0U; self->power_state = AQ_HW_POWER_STATE_D0; - err = self->aq_hw_ops.hw_reset(self->aq_hw); + err = self->aq_hw_ops->hw_reset(self->aq_hw); if (err < 0) goto err_exit; - err = self->aq_hw_ops.hw_init(self->aq_hw, - aq_nic_get_ndev(self)->dev_addr); + err = self->aq_hw_ops->hw_init(self->aq_hw, + aq_nic_get_ndev(self)->dev_addr); if (err < 0) goto err_exit; for (i = 0U, aq_vec = self->aq_vec[0]; self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) - aq_vec_init(aq_vec, &self->aq_hw_ops, self->aq_hw); + aq_vec_init(aq_vec, self->aq_hw_ops, self->aq_hw); + + netif_carrier_off(self->ndev); err_exit: return err; @@ -391,13 +295,13 @@ int aq_nic_start(struct aq_nic_s *self) int err = 0; unsigned int i = 0U; - err = self->aq_hw_ops.hw_multicast_list_set(self->aq_hw, + err = self->aq_hw_ops->hw_multicast_list_set(self->aq_hw, self->mc_list.ar, self->mc_list.count); if (err < 0) goto err_exit; - err = self->aq_hw_ops.hw_packet_filter_set(self->aq_hw, + err = self->aq_hw_ops->hw_packet_filter_set(self->aq_hw, self->packet_filter); if (err < 0) goto err_exit; @@ -409,7 +313,7 @@ int aq_nic_start(struct aq_nic_s *self) goto err_exit; } - err = self->aq_hw_ops.hw_start(self->aq_hw); + err = self->aq_hw_ops->hw_start(self->aq_hw); if (err < 0) goto err_exit; @@ -427,14 +331,14 @@ int aq_nic_start(struct aq_nic_s *self) } else { for (i = 0U, aq_vec = self->aq_vec[0]; self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { - err = aq_pci_func_alloc_irq(self->aq_pci_func, i, + err = aq_pci_func_alloc_irq(self, i, self->ndev->name, aq_vec, - aq_vec_get_affinity_mask(aq_vec)); + aq_vec_get_affinity_mask(aq_vec)); if (err < 0) goto err_exit; } - err = self->aq_hw_ops.hw_irq_enable(self->aq_hw, + err = self->aq_hw_ops->hw_irq_enable(self->aq_hw, AQ_CFG_IRQ_MASK); if (err < 0) goto err_exit; @@ -619,9 +523,8 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) frags = aq_nic_map_skb(self, skb, ring); if (likely(frags)) { - err = self->aq_hw_ops.hw_ring_tx_xmit(self->aq_hw, - ring, - frags); + err = self->aq_hw_ops->hw_ring_tx_xmit(self->aq_hw, + ring, frags); if (err >= 0) { ++ring->stats.tx.packets; ring->stats.tx.bytes += skb->len; @@ -636,14 +539,14 @@ err_exit: int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self) { - return self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw); + return self->aq_hw_ops->hw_interrupt_moderation_set(self->aq_hw); } int aq_nic_set_packet_filter(struct aq_nic_s *self, unsigned int flags) { int err = 0; - err = self->aq_hw_ops.hw_packet_filter_set(self->aq_hw, flags); + err = self->aq_hw_ops->hw_packet_filter_set(self->aq_hw, flags); if (err < 0) goto err_exit; @@ -675,11 +578,11 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev) * multicast mask */ self->packet_filter |= IFF_ALLMULTI; - self->aq_hw->aq_nic_cfg->mc_list_count = 0; - return self->aq_hw_ops.hw_packet_filter_set(self->aq_hw, - self->packet_filter); + self->aq_nic_cfg.mc_list_count = 0; + return self->aq_hw_ops->hw_packet_filter_set(self->aq_hw, + self->packet_filter); } else { - return self->aq_hw_ops.hw_multicast_list_set(self->aq_hw, + return self->aq_hw_ops->hw_multicast_list_set(self->aq_hw, self->mc_list.ar, self->mc_list.count); } @@ -694,7 +597,7 @@ int aq_nic_set_mtu(struct aq_nic_s *self, int new_mtu) int aq_nic_set_mac(struct aq_nic_s *self, struct net_device *ndev) { - return self->aq_hw_ops.hw_set_mac_address(self->aq_hw, ndev->dev_addr); + return self->aq_hw_ops->hw_set_mac_address(self->aq_hw, ndev->dev_addr); } unsigned int aq_nic_get_link_speed(struct aq_nic_s *self) @@ -709,8 +612,9 @@ int aq_nic_get_regs(struct aq_nic_s *self, struct ethtool_regs *regs, void *p) regs->version = 1; - err = self->aq_hw_ops.hw_get_regs(self->aq_hw, - &self->aq_hw_caps, regs_buff); + err = self->aq_hw_ops->hw_get_regs(self->aq_hw, + self->aq_nic_cfg.aq_hw_caps, + regs_buff); if (err < 0) goto err_exit; @@ -720,7 +624,7 @@ err_exit: int aq_nic_get_regs_count(struct aq_nic_s *self) { - return self->aq_hw_caps.mac_regs_count; + return self->aq_nic_cfg.aq_hw_caps->mac_regs_count; } void aq_nic_get_stats(struct aq_nic_s *self, u64 *data) @@ -728,7 +632,7 @@ void aq_nic_get_stats(struct aq_nic_s *self, u64 *data) unsigned int i = 0U; unsigned int count = 0U; struct aq_vec_s *aq_vec = NULL; - struct aq_stats_s *stats = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw); + struct aq_stats_s *stats = self->aq_hw_ops->hw_get_hw_stats(self->aq_hw); if (!stats) goto err_exit; @@ -759,7 +663,6 @@ void aq_nic_get_stats(struct aq_nic_s *self, u64 *data) i++; data += i; - count = 0U; for (i = 0U, aq_vec = self->aq_vec[0]; aq_vec && self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { @@ -773,7 +676,7 @@ err_exit:; static void aq_nic_update_ndev_stats(struct aq_nic_s *self) { struct net_device *ndev = self->ndev; - struct aq_stats_s *stats = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw); + struct aq_stats_s *stats = self->aq_hw_ops->hw_get_hw_stats(self->aq_hw); ndev->stats.rx_packets = stats->uprc + stats->mprc + stats->bprc; ndev->stats.rx_bytes = stats->ubrc + stats->mbrc + stats->bbrc; @@ -787,39 +690,46 @@ static void aq_nic_update_ndev_stats(struct aq_nic_s *self) void aq_nic_get_link_ksettings(struct aq_nic_s *self, struct ethtool_link_ksettings *cmd) { - cmd->base.port = PORT_TP; + if (self->aq_nic_cfg.aq_hw_caps->media_type == AQ_HW_MEDIA_TYPE_FIBRE) + cmd->base.port = PORT_FIBRE; + else + cmd->base.port = PORT_TP; /* This driver supports only 10G capable adapters, so DUPLEX_FULL */ cmd->base.duplex = DUPLEX_FULL; cmd->base.autoneg = self->aq_nic_cfg.is_autoneg; ethtool_link_ksettings_zero_link_mode(cmd, supported); - if (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_10G) + if (self->aq_nic_cfg.aq_hw_caps->link_speed_msk & AQ_NIC_RATE_10G) ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full); - if (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_5G) + if (self->aq_nic_cfg.aq_hw_caps->link_speed_msk & AQ_NIC_RATE_5G) ethtool_link_ksettings_add_link_mode(cmd, supported, 5000baseT_Full); - if (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_2GS) + if (self->aq_nic_cfg.aq_hw_caps->link_speed_msk & AQ_NIC_RATE_2GS) ethtool_link_ksettings_add_link_mode(cmd, supported, 2500baseT_Full); - if (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_1G) + if (self->aq_nic_cfg.aq_hw_caps->link_speed_msk & AQ_NIC_RATE_1G) ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full); - if (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_100M) + if (self->aq_nic_cfg.aq_hw_caps->link_speed_msk & AQ_NIC_RATE_100M) ethtool_link_ksettings_add_link_mode(cmd, supported, 100baseT_Full); - if (self->aq_hw_caps.flow_control) + if (self->aq_nic_cfg.aq_hw_caps->flow_control) ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + + if (self->aq_nic_cfg.aq_hw_caps->media_type == AQ_HW_MEDIA_TYPE_FIBRE) + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + else + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); ethtool_link_ksettings_zero_link_mode(cmd, advertising); @@ -850,7 +760,10 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self, ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); - ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); + if (self->aq_nic_cfg.aq_hw_caps->media_type == AQ_HW_MEDIA_TYPE_FIBRE) + ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); + else + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); } int aq_nic_set_link_ksettings(struct aq_nic_s *self, @@ -861,7 +774,7 @@ int aq_nic_set_link_ksettings(struct aq_nic_s *self, int err = 0; if (cmd->base.autoneg == AUTONEG_ENABLE) { - rate = self->aq_hw_caps.link_speed_msk; + rate = self->aq_nic_cfg.aq_hw_caps->link_speed_msk; self->aq_nic_cfg.is_autoneg = true; } else { speed = cmd->base.speed; @@ -892,7 +805,7 @@ int aq_nic_set_link_ksettings(struct aq_nic_s *self, goto err_exit; break; } - if (!(self->aq_hw_caps.link_speed_msk & rate)) { + if (!(self->aq_nic_cfg.aq_hw_caps->link_speed_msk & rate)) { err = -1; goto err_exit; } @@ -900,7 +813,7 @@ int aq_nic_set_link_ksettings(struct aq_nic_s *self, self->aq_nic_cfg.is_autoneg = false; } - err = self->aq_hw_ops.hw_set_link_speed(self->aq_hw, rate); + err = self->aq_fw_ops->set_link_speed(self->aq_hw, rate); if (err < 0) goto err_exit; @@ -919,7 +832,7 @@ u32 aq_nic_get_fw_version(struct aq_nic_s *self) { u32 fw_version = 0U; - self->aq_hw_ops.hw_get_fw_version(self->aq_hw, &fw_version); + self->aq_hw_ops->hw_get_fw_version(self->aq_hw, &fw_version); return fw_version; } @@ -934,18 +847,18 @@ int aq_nic_stop(struct aq_nic_s *self) del_timer_sync(&self->service_timer); - self->aq_hw_ops.hw_irq_disable(self->aq_hw, AQ_CFG_IRQ_MASK); + self->aq_hw_ops->hw_irq_disable(self->aq_hw, AQ_CFG_IRQ_MASK); if (self->aq_nic_cfg.is_polling) del_timer_sync(&self->polling_timer); else - aq_pci_func_free_irqs(self->aq_pci_func); + aq_pci_func_free_irqs(self); for (i = 0U, aq_vec = self->aq_vec[0]; self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) aq_vec_stop(aq_vec); - return self->aq_hw_ops.hw_stop(self->aq_hw); + return self->aq_hw_ops->hw_stop(self->aq_hw); } void aq_nic_deinit(struct aq_nic_s *self) @@ -961,16 +874,16 @@ void aq_nic_deinit(struct aq_nic_s *self) aq_vec_deinit(aq_vec); if (self->power_state == AQ_HW_POWER_STATE_D0) { - (void)self->aq_hw_ops.hw_deinit(self->aq_hw); + (void)self->aq_hw_ops->hw_deinit(self->aq_hw); } else { - (void)self->aq_hw_ops.hw_set_power(self->aq_hw, + (void)self->aq_hw_ops->hw_set_power(self->aq_hw, self->power_state); } err_exit:; } -void aq_nic_free_hot_resources(struct aq_nic_s *self) +void aq_nic_free_vectors(struct aq_nic_s *self) { unsigned int i = 0U; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 1cd7d728e91b..d16b0f1a95aa 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -17,24 +17,10 @@ #include "aq_hw.h" struct aq_ring_s; -struct aq_pci_func_s; struct aq_hw_ops; struct aq_fw_s; struct aq_vec_s; -#define AQ_NIC_FC_OFF 0U -#define AQ_NIC_FC_TX 1U -#define AQ_NIC_FC_RX 2U -#define AQ_NIC_FC_FULL 3U -#define AQ_NIC_FC_AUTO 4U - -#define AQ_NIC_RATE_10G BIT(0) -#define AQ_NIC_RATE_5G BIT(1) -#define AQ_NIC_RATE_5GSR BIT(2) -#define AQ_NIC_RATE_2GS BIT(3) -#define AQ_NIC_RATE_1G BIT(4) -#define AQ_NIC_RATE_100M BIT(5) - struct aq_nic_cfg_s { const struct aq_hw_caps_s *aq_hw_caps; u64 hw_features; @@ -77,13 +63,12 @@ struct aq_nic_s { struct aq_ring_s *aq_ring_tx[AQ_CFG_VECS_MAX * AQ_CFG_TCS_MAX]; struct aq_hw_s *aq_hw; struct net_device *ndev; - struct aq_pci_func_s *aq_pci_func; unsigned int aq_vecs; unsigned int packet_filter; unsigned int power_state; u8 port; - struct aq_hw_ops aq_hw_ops; - struct aq_hw_caps_s aq_hw_caps; + const struct aq_hw_ops *aq_hw_ops; + const struct aq_fw_ops *aq_fw_ops; struct aq_nic_cfg_s aq_nic_cfg; struct timer_list service_timer; struct timer_list polling_timer; @@ -102,18 +87,13 @@ static inline struct device *aq_nic_get_dev(struct aq_nic_s *self) return self->ndev->dev.parent; } -struct aq_nic_s *aq_nic_alloc_cold(struct pci_dev *pdev, - struct aq_pci_func_s *aq_pci_func, - unsigned int port, - const struct aq_hw_ops *aq_hw_ops); -int aq_nic_ndev_init(struct aq_nic_s *self); +void aq_nic_ndev_init(struct aq_nic_s *self); struct aq_nic_s *aq_nic_alloc_hot(struct net_device *ndev); void aq_nic_set_tx_ring(struct aq_nic_s *self, unsigned int idx, struct aq_ring_s *ring); -struct device *aq_nic_get_dev(struct aq_nic_s *self); struct net_device *aq_nic_get_ndev(struct aq_nic_s *self); int aq_nic_init(struct aq_nic_s *self); -int aq_nic_cfg_start(struct aq_nic_s *self); +void aq_nic_cfg_start(struct aq_nic_s *self); int aq_nic_ndev_register(struct aq_nic_s *self); void aq_nic_ndev_free(struct aq_nic_s *self); int aq_nic_start(struct aq_nic_s *self); @@ -124,6 +104,7 @@ void aq_nic_get_stats(struct aq_nic_s *self, u64 *data); int aq_nic_stop(struct aq_nic_s *self); void aq_nic_deinit(struct aq_nic_s *self); void aq_nic_free_hot_resources(struct aq_nic_s *self); +void aq_nic_free_vectors(struct aq_nic_s *self); int aq_nic_set_mtu(struct aq_nic_s *self, int new_mtu); int aq_nic_set_mac(struct aq_nic_s *self, struct net_device *ndev); int aq_nic_set_packet_filter(struct aq_nic_s *self, unsigned int flags); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 78ef7d2deffe..22889fc158f2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -12,201 +12,132 @@ #include <linux/interrupt.h> #include <linux/module.h> -#include "aq_pci_func.h" +#include "aq_main.h" #include "aq_nic.h" #include "aq_vec.h" #include "aq_hw.h" +#include "aq_pci_func.h" #include "hw_atl/hw_atl_a0.h" #include "hw_atl/hw_atl_b0.h" -struct aq_pci_func_s { - struct pci_dev *pdev; - struct aq_nic_s *port[AQ_CFG_PCI_FUNC_PORTS]; - void __iomem *mmio; - void *aq_vec[AQ_CFG_PCI_FUNC_MSIX_IRQS]; - resource_size_t mmio_pa; - unsigned int msix_entry_mask; - unsigned int ports; - bool is_pci_enabled; - bool is_regions; - bool is_pci_using_dac; - struct aq_hw_caps_s aq_hw_caps; -}; - static const struct pci_device_id aq_pci_tbl[] = { - { PCI_VDEVICE(AQUANTIA, HW_ATL_DEVICE_ID_0001), }, - { PCI_VDEVICE(AQUANTIA, HW_ATL_DEVICE_ID_D100), }, - { PCI_VDEVICE(AQUANTIA, HW_ATL_DEVICE_ID_D107), }, - { PCI_VDEVICE(AQUANTIA, HW_ATL_DEVICE_ID_D108), }, - { PCI_VDEVICE(AQUANTIA, HW_ATL_DEVICE_ID_D109), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_0001), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_D100), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_D107), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_D108), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_D109), }, + + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC100), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC107), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC108), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC109), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC111), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC112), }, + + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC100S), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC107S), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC108S), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC109S), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC111S), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC112S), }, + + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC111E), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC112E), }, + {} }; +static const struct aq_board_revision_s hw_atl_boards[] = { + { AQ_DEVICE_ID_0001, AQ_HWREV_1, &hw_atl_ops_a0, &hw_atl_a0_caps_aqc107, }, + { AQ_DEVICE_ID_D100, AQ_HWREV_1, &hw_atl_ops_a0, &hw_atl_a0_caps_aqc100, }, + { AQ_DEVICE_ID_D107, AQ_HWREV_1, &hw_atl_ops_a0, &hw_atl_a0_caps_aqc107, }, + { AQ_DEVICE_ID_D108, AQ_HWREV_1, &hw_atl_ops_a0, &hw_atl_a0_caps_aqc108, }, + { AQ_DEVICE_ID_D109, AQ_HWREV_1, &hw_atl_ops_a0, &hw_atl_a0_caps_aqc109, }, + + { AQ_DEVICE_ID_0001, AQ_HWREV_2, &hw_atl_ops_b0, &hw_atl_b0_caps_aqc107, }, + { AQ_DEVICE_ID_D100, AQ_HWREV_2, &hw_atl_ops_b0, &hw_atl_b0_caps_aqc100, }, + { AQ_DEVICE_ID_D107, AQ_HWREV_2, &hw_atl_ops_b0, &hw_atl_b0_caps_aqc107, }, + { AQ_DEVICE_ID_D108, AQ_HWREV_2, &hw_atl_ops_b0, &hw_atl_b0_caps_aqc108, }, + { AQ_DEVICE_ID_D109, AQ_HWREV_2, &hw_atl_ops_b0, &hw_atl_b0_caps_aqc109, }, + + { AQ_DEVICE_ID_AQC100, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc107, }, + { AQ_DEVICE_ID_AQC107, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc107, }, + { AQ_DEVICE_ID_AQC108, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc108, }, + { AQ_DEVICE_ID_AQC109, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc109, }, + { AQ_DEVICE_ID_AQC111, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc111, }, + { AQ_DEVICE_ID_AQC112, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc112, }, + + { AQ_DEVICE_ID_AQC100S, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc100s, }, + { AQ_DEVICE_ID_AQC107S, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc107s, }, + { AQ_DEVICE_ID_AQC108S, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc108s, }, + { AQ_DEVICE_ID_AQC109S, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc109s, }, + { AQ_DEVICE_ID_AQC111S, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc111s, }, + { AQ_DEVICE_ID_AQC112S, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc112s, }, + + { AQ_DEVICE_ID_AQC111E, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc111e, }, + { AQ_DEVICE_ID_AQC112E, AQ_HWREV_ANY, &hw_atl_ops_b1, &hw_atl_b0_caps_aqc112e, }, +}; + MODULE_DEVICE_TABLE(pci, aq_pci_tbl); -static const struct aq_hw_ops *aq_pci_probe_get_hw_ops_by_id(struct pci_dev *pdev) +static int aq_pci_probe_get_hw_by_id(struct pci_dev *pdev, + const struct aq_hw_ops **ops, + const struct aq_hw_caps_s **caps) { - const struct aq_hw_ops *ops = NULL; - - ops = hw_atl_a0_get_ops_by_id(pdev); - if (!ops) - ops = hw_atl_b0_get_ops_by_id(pdev); + int i = 0; - return ops; -} + if (pdev->vendor != PCI_VENDOR_ID_AQUANTIA) + return -EINVAL; -struct aq_pci_func_s *aq_pci_func_alloc(const struct aq_hw_ops *aq_hw_ops, - struct pci_dev *pdev) -{ - struct aq_pci_func_s *self = NULL; - int err = 0; - unsigned int port = 0U; - - if (!aq_hw_ops) { - err = -EFAULT; - goto err_exit; - } - self = kzalloc(sizeof(*self), GFP_KERNEL); - if (!self) { - err = -ENOMEM; - goto err_exit; - } - - pci_set_drvdata(pdev, self); - self->pdev = pdev; - - err = aq_hw_ops->get_hw_caps(NULL, &self->aq_hw_caps, pdev->device, - pdev->subsystem_device); - if (err < 0) - goto err_exit; - - self->ports = self->aq_hw_caps.ports; - - for (port = 0; port < self->ports; ++port) { - struct aq_nic_s *aq_nic = aq_nic_alloc_cold(pdev, self, - port, aq_hw_ops); - - if (!aq_nic) { - err = -ENOMEM; - goto err_exit; + for (i = 0; i < ARRAY_SIZE(hw_atl_boards); i++) { + if (hw_atl_boards[i].devid == pdev->device && + (hw_atl_boards[i].revision == AQ_HWREV_ANY || + hw_atl_boards[i].revision == pdev->revision)) { + *ops = hw_atl_boards[i].ops; + *caps = hw_atl_boards[i].caps; + break; } - self->port[port] = aq_nic; } -err_exit: - if (err < 0) { - if (self) - aq_pci_func_free(self); - self = NULL; - } + if (i == ARRAY_SIZE(hw_atl_boards)) + return -EINVAL; - (void)err; - return self; + return 0; } -int aq_pci_func_init(struct aq_pci_func_s *self) +int aq_pci_func_init(struct pci_dev *pdev) { int err = 0; - unsigned int bar = 0U; - unsigned int port = 0U; - unsigned int numvecs = 0U; - - err = pci_enable_device(self->pdev); - if (err < 0) - goto err_exit; - - self->is_pci_enabled = true; - err = pci_set_dma_mask(self->pdev, DMA_BIT_MASK(64)); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (!err) { - err = pci_set_consistent_dma_mask(self->pdev, DMA_BIT_MASK(64)); - self->is_pci_using_dac = 1; + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + } if (err) { - err = pci_set_dma_mask(self->pdev, DMA_BIT_MASK(32)); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (!err) - err = pci_set_consistent_dma_mask(self->pdev, + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - self->is_pci_using_dac = 0; } if (err != 0) { err = -ENOSR; goto err_exit; } - err = pci_request_regions(self->pdev, AQ_CFG_DRV_NAME "_mmio"); + err = pci_request_regions(pdev, AQ_CFG_DRV_NAME "_mmio"); if (err < 0) goto err_exit; - self->is_regions = true; - - pci_set_master(self->pdev); - - for (bar = 0; bar < 4; ++bar) { - if (IORESOURCE_MEM & pci_resource_flags(self->pdev, bar)) { - resource_size_t reg_sz; - - self->mmio_pa = pci_resource_start(self->pdev, bar); - if (self->mmio_pa == 0U) { - err = -EIO; - goto err_exit; - } - - reg_sz = pci_resource_len(self->pdev, bar); - if ((reg_sz <= 24 /*ATL_REGS_SIZE*/)) { - err = -EIO; - goto err_exit; - } - - self->mmio = ioremap_nocache(self->mmio_pa, reg_sz); - if (!self->mmio) { - err = -EIO; - goto err_exit; - } - break; - } - } - - numvecs = min((u8)AQ_CFG_VECS_DEF, self->aq_hw_caps.msix_irqs); - numvecs = min(numvecs, num_online_cpus()); - - /* enable interrupts */ -#if !AQ_CFG_FORCE_LEGACY_INT - err = pci_alloc_irq_vectors(self->pdev, numvecs, numvecs, PCI_IRQ_MSIX); + pci_set_master(pdev); - if (err < 0) { - err = pci_alloc_irq_vectors(self->pdev, 1, 1, - PCI_IRQ_MSI | PCI_IRQ_LEGACY); - if (err < 0) - goto err_exit; - } -#endif /* AQ_CFG_FORCE_LEGACY_INT */ - - /* net device init */ - for (port = 0; port < self->ports; ++port) { - if (!self->port[port]) - continue; - - err = aq_nic_cfg_start(self->port[port]); - if (err < 0) - goto err_exit; - - err = aq_nic_ndev_init(self->port[port]); - if (err < 0) - goto err_exit; - - err = aq_nic_ndev_register(self->port[port]); - if (err < 0) - goto err_exit; - } + return 0; err_exit: - if (err < 0) - aq_pci_func_deinit(self); return err; } -int aq_pci_func_alloc_irq(struct aq_pci_func_s *self, unsigned int i, +int aq_pci_func_alloc_irq(struct aq_nic_s *self, unsigned int i, char *name, void *aq_vec, cpumask_t *affinity_mask) { struct pci_dev *pdev = self->pdev; @@ -227,11 +158,10 @@ int aq_pci_func_alloc_irq(struct aq_pci_func_s *self, unsigned int i, irq_set_affinity_hint(pci_irq_vector(pdev, i), affinity_mask); } - return err; } -void aq_pci_func_free_irqs(struct aq_pci_func_s *self) +void aq_pci_func_free_irqs(struct aq_nic_s *self) { struct pci_dev *pdev = self->pdev; unsigned int i = 0U; @@ -247,12 +177,7 @@ void aq_pci_func_free_irqs(struct aq_pci_func_s *self) } } -void __iomem *aq_pci_func_get_mmio(struct aq_pci_func_s *self) -{ - return self->mmio; -} - -unsigned int aq_pci_func_get_irq_type(struct aq_pci_func_s *self) +unsigned int aq_pci_func_get_irq_type(struct aq_nic_s *self) { if (self->pdev->msix_enabled) return AQ_HW_IRQ_MSIX; @@ -261,115 +186,150 @@ unsigned int aq_pci_func_get_irq_type(struct aq_pci_func_s *self) return AQ_HW_IRQ_LEGACY; } -void aq_pci_func_deinit(struct aq_pci_func_s *self) +static void aq_pci_free_irq_vectors(struct aq_nic_s *self) { - if (!self) - goto err_exit; - - aq_pci_func_free_irqs(self); pci_free_irq_vectors(self->pdev); - - if (self->is_regions) - pci_release_regions(self->pdev); - - if (self->is_pci_enabled) - pci_disable_device(self->pdev); - -err_exit:; } -void aq_pci_func_free(struct aq_pci_func_s *self) +static int aq_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) { - unsigned int port = 0U; + struct aq_nic_s *self = NULL; + int err = 0; + struct net_device *ndev; + resource_size_t mmio_pa; + u32 bar; + u32 numvecs; - if (!self) - goto err_exit; + err = pci_enable_device(pdev); + if (err) + return err; - for (port = 0; port < self->ports; ++port) { - if (!self->port[port]) - continue; + err = aq_pci_func_init(pdev); + if (err) + goto err_pci_func; - aq_nic_ndev_free(self->port[port]); + ndev = aq_ndev_alloc(); + if (!ndev) { + err = -ENOMEM; + goto err_ndev; } - if (self->mmio) - iounmap(self->mmio); - - kfree(self); + self = netdev_priv(ndev); + self->pdev = pdev; + SET_NETDEV_DEV(ndev, &pdev->dev); + pci_set_drvdata(pdev, self); -err_exit:; -} + err = aq_pci_probe_get_hw_by_id(pdev, &self->aq_hw_ops, + &aq_nic_get_cfg(self)->aq_hw_caps); + if (err) + goto err_ioremap; -int aq_pci_func_change_pm_state(struct aq_pci_func_s *self, - pm_message_t *pm_msg) -{ - int err = 0; - unsigned int port = 0U; + self->aq_hw = kzalloc(sizeof(*self->aq_hw), GFP_KERNEL); + self->aq_hw->aq_nic_cfg = aq_nic_get_cfg(self); - if (!self) { - err = -EFAULT; - goto err_exit; - } - for (port = 0; port < self->ports; ++port) { - if (!self->port[port]) - continue; + for (bar = 0; bar < 4; ++bar) { + if (IORESOURCE_MEM & pci_resource_flags(pdev, bar)) { + resource_size_t reg_sz; - (void)aq_nic_change_pm_state(self->port[port], pm_msg); - } + mmio_pa = pci_resource_start(pdev, bar); + if (mmio_pa == 0U) { + err = -EIO; + goto err_ioremap; + } -err_exit: - return err; -} + reg_sz = pci_resource_len(pdev, bar); + if ((reg_sz <= 24 /*ATL_REGS_SIZE*/)) { + err = -EIO; + goto err_ioremap; + } -static int aq_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *pci_id) -{ - const struct aq_hw_ops *aq_hw_ops = NULL; - struct aq_pci_func_s *aq_pci_func = NULL; - int err = 0; + self->aq_hw->mmio = ioremap_nocache(mmio_pa, reg_sz); + if (!self->aq_hw->mmio) { + err = -EIO; + goto err_ioremap; + } + break; + } + } - err = pci_enable_device(pdev); - if (err < 0) - goto err_exit; - aq_hw_ops = aq_pci_probe_get_hw_ops_by_id(pdev); - aq_pci_func = aq_pci_func_alloc(aq_hw_ops, pdev); - if (!aq_pci_func) { - err = -ENOMEM; - goto err_exit; + if (bar == 4) { + err = -EIO; + goto err_ioremap; } - err = aq_pci_func_init(aq_pci_func); - if (err < 0) - goto err_exit; -err_exit: + numvecs = min((u8)AQ_CFG_VECS_DEF, + aq_nic_get_cfg(self)->aq_hw_caps->msix_irqs); + numvecs = min(numvecs, num_online_cpus()); + /*enable interrupts */ +#if !AQ_CFG_FORCE_LEGACY_INT + err = pci_alloc_irq_vectors(self->pdev, numvecs, numvecs, + PCI_IRQ_MSIX); + if (err < 0) { - if (aq_pci_func) - aq_pci_func_free(aq_pci_func); + err = pci_alloc_irq_vectors(self->pdev, 1, 1, + PCI_IRQ_MSI | PCI_IRQ_LEGACY); + if (err < 0) + goto err_hwinit; } +#endif + + /* net device init */ + aq_nic_cfg_start(self); + + aq_nic_ndev_init(self); + + err = aq_nic_ndev_register(self); + if (err < 0) + goto err_register; + + return 0; + +err_register: + aq_nic_free_vectors(self); + aq_pci_free_irq_vectors(self); +err_hwinit: + iounmap(self->aq_hw->mmio); +err_ioremap: + free_netdev(ndev); +err_pci_func: + pci_release_regions(pdev); +err_ndev: + pci_disable_device(pdev); return err; } static void aq_pci_remove(struct pci_dev *pdev) { - struct aq_pci_func_s *aq_pci_func = pci_get_drvdata(pdev); + struct aq_nic_s *self = pci_get_drvdata(pdev); + + if (self->ndev) { + if (self->ndev->reg_state == NETREG_REGISTERED) + unregister_netdev(self->ndev); + aq_nic_free_vectors(self); + aq_pci_free_irq_vectors(self); + iounmap(self->aq_hw->mmio); + kfree(self->aq_hw); + pci_release_regions(pdev); + free_netdev(self->ndev); + } - aq_pci_func_deinit(aq_pci_func); - aq_pci_func_free(aq_pci_func); + pci_disable_device(pdev); } static int aq_pci_suspend(struct pci_dev *pdev, pm_message_t pm_msg) { - struct aq_pci_func_s *aq_pci_func = pci_get_drvdata(pdev); + struct aq_nic_s *self = pci_get_drvdata(pdev); - return aq_pci_func_change_pm_state(aq_pci_func, &pm_msg); + return aq_nic_change_pm_state(self, &pm_msg); } static int aq_pci_resume(struct pci_dev *pdev) { - struct aq_pci_func_s *aq_pci_func = pci_get_drvdata(pdev); + struct aq_nic_s *self = pci_get_drvdata(pdev); pm_message_t pm_msg = PMSG_RESTORE; - return aq_pci_func_change_pm_state(aq_pci_func, &pm_msg); + return aq_nic_change_pm_state(self, &pm_msg); } static struct pci_driver aq_pci_ops = { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.h b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.h index 5f100ea1b0d6..aeee67bf69fa 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.h @@ -15,19 +15,18 @@ #include "aq_common.h" #include "aq_nic.h" -struct aq_pci_func_s *aq_pci_func_alloc(const struct aq_hw_ops *hw_ops, - struct pci_dev *pdev); -int aq_pci_func_init(struct aq_pci_func_s *self); -int aq_pci_func_alloc_irq(struct aq_pci_func_s *self, unsigned int i, +struct aq_board_revision_s { + unsigned short devid; + unsigned short revision; + const struct aq_hw_ops *ops; + const struct aq_hw_caps_s *caps; +}; + +int aq_pci_func_init(struct pci_dev *pdev); +int aq_pci_func_alloc_irq(struct aq_nic_s *self, unsigned int i, char *name, void *aq_vec, cpumask_t *affinity_mask); -void aq_pci_func_free_irqs(struct aq_pci_func_s *self); -int aq_pci_func_start(struct aq_pci_func_s *self); -void __iomem *aq_pci_func_get_mmio(struct aq_pci_func_s *self); -unsigned int aq_pci_func_get_irq_type(struct aq_pci_func_s *self); -void aq_pci_func_deinit(struct aq_pci_func_s *self); -void aq_pci_func_free(struct aq_pci_func_s *self); -int aq_pci_func_change_pm_state(struct aq_pci_func_s *self, - pm_message_t *pm_msg); +void aq_pci_func_free_irqs(struct aq_nic_s *self); +unsigned int aq_pci_func_get_irq_type(struct aq_nic_s *self); #endif /* AQ_PCI_FUNC_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index 4a1c1b96b8b6..67e2f9fb9402 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -18,45 +18,67 @@ #include "hw_atl_llh.h" #include "hw_atl_a0_internal.h" -static int hw_atl_a0_get_hw_caps(struct aq_hw_s *self, - struct aq_hw_caps_s *aq_hw_caps, - unsigned short device, - unsigned short subsystem_device) -{ - memcpy(aq_hw_caps, &hw_atl_a0_hw_caps_, sizeof(*aq_hw_caps)); - - if (device == HW_ATL_DEVICE_ID_D108 && subsystem_device == 0x0001) - aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_10G; - - if (device == HW_ATL_DEVICE_ID_D109 && subsystem_device == 0x0001) { - aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_10G; - aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_5G; - } - - return 0; -} - -static struct aq_hw_s *hw_atl_a0_create(struct aq_pci_func_s *aq_pci_func, - unsigned int port) -{ - struct aq_hw_s *self = NULL; - - self = kzalloc(sizeof(*self), GFP_KERNEL); - if (!self) - goto err_exit; - - self->aq_pci_func = aq_pci_func; +#define DEFAULT_A0_BOARD_BASIC_CAPABILITIES \ + .is_64_dma = true, \ + .msix_irqs = 4U, \ + .irq_mask = ~0U, \ + .vecs = HW_ATL_A0_RSS_MAX, \ + .tcs = HW_ATL_A0_TC_MAX, \ + .rxd_alignment = 1U, \ + .rxd_size = HW_ATL_A0_RXD_SIZE, \ + .rxds = 248U, \ + .txd_alignment = 1U, \ + .txd_size = HW_ATL_A0_TXD_SIZE, \ + .txds = 8U * 1024U, \ + .txhwb_alignment = 4096U, \ + .tx_rings = HW_ATL_A0_TX_RINGS, \ + .rx_rings = HW_ATL_A0_RX_RINGS, \ + .hw_features = NETIF_F_HW_CSUM | \ + NETIF_F_RXHASH | \ + NETIF_F_RXCSUM | \ + NETIF_F_SG | \ + NETIF_F_TSO, \ + .hw_priv_flags = IFF_UNICAST_FLT, \ + .flow_control = true, \ + .mtu = HW_ATL_A0_MTU_JUMBO, \ + .mac_regs_count = 88, \ + .hw_alive_check_addr = 0x10U + +const struct aq_hw_caps_s hw_atl_a0_caps_aqc100 = { + DEFAULT_A0_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_FIBRE, + .link_speed_msk = HW_ATL_A0_RATE_5G | + HW_ATL_A0_RATE_2G5 | + HW_ATL_A0_RATE_1G | + HW_ATL_A0_RATE_100M, +}; - self->not_ff_addr = 0x10U; +const struct aq_hw_caps_s hw_atl_a0_caps_aqc107 = { + DEFAULT_A0_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = HW_ATL_A0_RATE_10G | + HW_ATL_A0_RATE_5G | + HW_ATL_A0_RATE_2G5 | + HW_ATL_A0_RATE_1G | + HW_ATL_A0_RATE_100M, +}; -err_exit: - return self; -} +const struct aq_hw_caps_s hw_atl_a0_caps_aqc108 = { + DEFAULT_A0_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = HW_ATL_A0_RATE_5G | + HW_ATL_A0_RATE_2G5 | + HW_ATL_A0_RATE_1G | + HW_ATL_A0_RATE_100M, +}; -static void hw_atl_a0_destroy(struct aq_hw_s *self) -{ - kfree(self); -} +const struct aq_hw_caps_s hw_atl_a0_caps_aqc109 = { + DEFAULT_A0_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = HW_ATL_A0_RATE_2G5 | + HW_ATL_A0_RATE_1G | + HW_ATL_A0_RATE_100M, +}; static int hw_atl_a0_hw_reset(struct aq_hw_s *self) { @@ -83,7 +105,7 @@ static int hw_atl_a0_hw_reset(struct aq_hw_s *self) if (err < 0) goto err_exit; - hw_atl_utils_mpi_set(self, MPI_RESET, 0x0U); + self->aq_fw_ops->set_state(self, MPI_RESET); err = aq_hw_err_from_flags(self); @@ -332,7 +354,8 @@ static int hw_atl_a0_hw_init(struct aq_hw_s *self, u8 *mac_addr) hw_atl_a0_hw_mac_addr_set(self, mac_addr); - hw_atl_utils_mpi_set(self, MPI_INIT, aq_nic_cfg->link_speed_msk); + self->aq_fw_ops->set_link_speed(self, aq_nic_cfg->link_speed_msk); + self->aq_fw_ops->set_state(self, MPI_INIT); hw_atl_reg_tx_dma_debug_ctl_set(self, 0x800000b8U); hw_atl_reg_tx_dma_debug_ctl_set(self, 0x000000b8U); @@ -343,7 +366,7 @@ static int hw_atl_a0_hw_init(struct aq_hw_s *self, u8 *mac_addr) /* Reset link status and read out initial hardware counters */ self->aq_link_status.mbps = 0; - hw_atl_utils_update_stats(self); + self->aq_fw_ops->update_stats(self); err = aq_hw_err_from_flags(self); if (err < 0) @@ -849,27 +872,8 @@ static int hw_atl_a0_hw_ring_rx_stop(struct aq_hw_s *self, return aq_hw_err_from_flags(self); } -static int hw_atl_a0_hw_set_speed(struct aq_hw_s *self, u32 speed) -{ - int err = 0; - - err = hw_atl_utils_mpi_set_speed(self, speed, MPI_INIT); - if (err < 0) - goto err_exit; - -err_exit: - return err; -} - -static const struct aq_hw_ops hw_atl_ops_ = { - .create = hw_atl_a0_create, - .destroy = hw_atl_a0_destroy, - .get_hw_caps = hw_atl_a0_get_hw_caps, - - .hw_get_mac_permanent = hw_atl_utils_get_mac_permanent, +const struct aq_hw_ops hw_atl_ops_a0 = { .hw_set_mac_address = hw_atl_a0_hw_mac_addr_set, - .hw_get_link_status = hw_atl_utils_mpi_get_link_status, - .hw_set_link_speed = hw_atl_a0_hw_set_speed, .hw_init = hw_atl_a0_hw_init, .hw_deinit = hw_atl_utils_hw_deinit, .hw_set_power = hw_atl_utils_hw_set_power, @@ -899,21 +903,6 @@ static const struct aq_hw_ops hw_atl_ops_ = { .hw_rss_set = hw_atl_a0_hw_rss_set, .hw_rss_hash_set = hw_atl_a0_hw_rss_hash_set, .hw_get_regs = hw_atl_utils_hw_get_regs, - .hw_update_stats = hw_atl_utils_update_stats, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, }; - -const struct aq_hw_ops *hw_atl_a0_get_ops_by_id(struct pci_dev *pdev) -{ - bool is_vid_ok = (pdev->vendor == PCI_VENDOR_ID_AQUANTIA); - bool is_did_ok = ((pdev->device == HW_ATL_DEVICE_ID_0001) || - (pdev->device == HW_ATL_DEVICE_ID_D100) || - (pdev->device == HW_ATL_DEVICE_ID_D107) || - (pdev->device == HW_ATL_DEVICE_ID_D108) || - (pdev->device == HW_ATL_DEVICE_ID_D109)); - - bool is_rev_ok = (pdev->revision == 1U); - - return (is_vid_ok && is_did_ok && is_rev_ok) ? &hw_atl_ops_ : NULL; -} diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h index 4fdd51b67097..25fe954def03 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.h @@ -16,19 +16,11 @@ #include "../aq_common.h" -#ifndef PCI_VENDOR_ID_AQUANTIA +extern const struct aq_hw_caps_s hw_atl_a0_caps_aqc100; +extern const struct aq_hw_caps_s hw_atl_a0_caps_aqc107; +extern const struct aq_hw_caps_s hw_atl_a0_caps_aqc108; +extern const struct aq_hw_caps_s hw_atl_a0_caps_aqc109; -#define PCI_VENDOR_ID_AQUANTIA 0x1D6A -#define HW_ATL_DEVICE_ID_0001 0x0001 -#define HW_ATL_DEVICE_ID_D100 0xD100 -#define HW_ATL_DEVICE_ID_D107 0xD107 -#define HW_ATL_DEVICE_ID_D108 0xD108 -#define HW_ATL_DEVICE_ID_D109 0xD109 - -#define HW_ATL_NIC_NAME "aQuantia AQtion 5Gbit Network Adapter" - -#endif - -const struct aq_hw_ops *hw_atl_a0_get_ops_by_id(struct pci_dev *pdev); +extern const struct aq_hw_ops hw_atl_ops_a0; #endif /* HW_ATL_A0_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h index 7a71330252bd..1d8855558d74 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h @@ -88,38 +88,4 @@ #define HW_ATL_A0_FW_VER_EXPECTED 0x01050006U -/* HW layer capabilities */ -static struct aq_hw_caps_s hw_atl_a0_hw_caps_ = { - .ports = 1U, - .is_64_dma = true, - .msix_irqs = 4U, - .irq_mask = ~0U, - .vecs = HW_ATL_A0_RSS_MAX, - .tcs = HW_ATL_A0_TC_MAX, - .rxd_alignment = 1U, - .rxd_size = HW_ATL_A0_RXD_SIZE, - .rxds = 248U, - .txd_alignment = 1U, - .txd_size = HW_ATL_A0_TXD_SIZE, - .txds = 8U * 1024U, - .txhwb_alignment = 4096U, - .tx_rings = HW_ATL_A0_TX_RINGS, - .rx_rings = HW_ATL_A0_RX_RINGS, - .hw_features = NETIF_F_HW_CSUM | - NETIF_F_RXCSUM | - NETIF_F_RXHASH | - NETIF_F_SG | - NETIF_F_TSO, - .hw_priv_flags = IFF_UNICAST_FLT, - .link_speed_msk = (HW_ATL_A0_RATE_10G | - HW_ATL_A0_RATE_5G | - HW_ATL_A0_RATE_2G5 | - HW_ATL_A0_RATE_1G | - HW_ATL_A0_RATE_100M), - .flow_control = true, - .mtu = HW_ATL_A0_MTU_JUMBO, - .mac_regs_count = 88, - .fw_ver_expected = HW_ATL_A0_FW_VER_EXPECTED, -}; - #endif /* HW_ATL_A0_INTERNAL_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 0b090161ed79..819f6bcf9b4e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -19,76 +19,82 @@ #include "hw_atl_b0_internal.h" #include "hw_atl_llh_internal.h" -static int hw_atl_b0_get_hw_caps(struct aq_hw_s *self, - struct aq_hw_caps_s *aq_hw_caps, - unsigned short device, - unsigned short subsystem_device) -{ - memcpy(aq_hw_caps, &hw_atl_b0_hw_caps_, sizeof(*aq_hw_caps)); - - if (device == HW_ATL_DEVICE_ID_D108 && subsystem_device == 0x0001) - aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_10G; - - if (device == HW_ATL_DEVICE_ID_D109 && subsystem_device == 0x0001) { - aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_10G; - aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_5G; - } - - return 0; -} - -static struct aq_hw_s *hw_atl_b0_create(struct aq_pci_func_s *aq_pci_func, - unsigned int port) -{ - struct aq_hw_s *self = NULL; - - self = kzalloc(sizeof(*self), GFP_KERNEL); - if (!self) - goto err_exit; - - self->aq_pci_func = aq_pci_func; +#define DEFAULT_B0_BOARD_BASIC_CAPABILITIES \ + .is_64_dma = true, \ + .msix_irqs = 4U, \ + .irq_mask = ~0U, \ + .vecs = HW_ATL_B0_RSS_MAX, \ + .tcs = HW_ATL_B0_TC_MAX, \ + .rxd_alignment = 1U, \ + .rxd_size = HW_ATL_B0_RXD_SIZE, \ + .rxds = 4U * 1024U, \ + .txd_alignment = 1U, \ + .txd_size = HW_ATL_B0_TXD_SIZE, \ + .txds = 8U * 1024U, \ + .txhwb_alignment = 4096U, \ + .tx_rings = HW_ATL_B0_TX_RINGS, \ + .rx_rings = HW_ATL_B0_RX_RINGS, \ + .hw_features = NETIF_F_HW_CSUM | \ + NETIF_F_RXCSUM | \ + NETIF_F_RXHASH | \ + NETIF_F_SG | \ + NETIF_F_TSO | \ + NETIF_F_LRO, \ + .hw_priv_flags = IFF_UNICAST_FLT, \ + .flow_control = true, \ + .mtu = HW_ATL_B0_MTU_JUMBO, \ + .mac_regs_count = 88, \ + .hw_alive_check_addr = 0x10U + +const struct aq_hw_caps_s hw_atl_b0_caps_aqc100 = { + DEFAULT_B0_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_FIBRE, + .link_speed_msk = HW_ATL_B0_RATE_10G | + HW_ATL_B0_RATE_5G | + HW_ATL_B0_RATE_2G5 | + HW_ATL_B0_RATE_1G | + HW_ATL_B0_RATE_100M, +}; - self->not_ff_addr = 0x10U; +const struct aq_hw_caps_s hw_atl_b0_caps_aqc107 = { + DEFAULT_B0_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = HW_ATL_B0_RATE_10G | + HW_ATL_B0_RATE_5G | + HW_ATL_B0_RATE_2G5 | + HW_ATL_B0_RATE_1G | + HW_ATL_B0_RATE_100M, +}; -err_exit: - return self; -} +const struct aq_hw_caps_s hw_atl_b0_caps_aqc108 = { + DEFAULT_B0_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = HW_ATL_B0_RATE_5G | + HW_ATL_B0_RATE_2G5 | + HW_ATL_B0_RATE_1G | + HW_ATL_B0_RATE_100M, +}; -static void hw_atl_b0_destroy(struct aq_hw_s *self) -{ - kfree(self); -} +const struct aq_hw_caps_s hw_atl_b0_caps_aqc109 = { + DEFAULT_B0_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = HW_ATL_B0_RATE_2G5 | + HW_ATL_B0_RATE_1G | + HW_ATL_B0_RATE_100M, +}; static int hw_atl_b0_hw_reset(struct aq_hw_s *self) { int err = 0; - hw_atl_glb_glb_reg_res_dis_set(self, 1U); - hw_atl_pci_pci_reg_res_dis_set(self, 0U); - hw_atl_rx_rx_reg_res_dis_set(self, 0U); - hw_atl_tx_tx_reg_res_dis_set(self, 0U); - - HW_ATL_FLUSH(); - hw_atl_glb_soft_res_set(self, 1); + err = hw_atl_utils_soft_reset(self); + if (err) + return err; - /* check 10 times by 1ms */ - AQ_HW_WAIT_FOR(hw_atl_glb_soft_res_get(self) == 0, 1000U, 10U); - if (err < 0) - goto err_exit; - - hw_atl_itr_irq_reg_res_dis_set(self, 0U); - hw_atl_itr_res_irq_set(self, 1U); - - /* check 10 times by 1ms */ - AQ_HW_WAIT_FOR(hw_atl_itr_res_irq_get(self) == 0, 1000U, 10U); - if (err < 0) - goto err_exit; - - hw_atl_utils_mpi_set(self, MPI_RESET, 0x0U); + self->aq_fw_ops->set_state(self, MPI_RESET); err = aq_hw_err_from_flags(self); -err_exit: return err; } @@ -379,7 +385,8 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self, u8 *mac_addr) hw_atl_b0_hw_mac_addr_set(self, mac_addr); - hw_atl_utils_mpi_set(self, MPI_INIT, aq_nic_cfg->link_speed_msk); + self->aq_fw_ops->set_link_speed(self, aq_nic_cfg->link_speed_msk); + self->aq_fw_ops->set_state(self, MPI_INIT); hw_atl_b0_hw_qos_set(self); hw_atl_b0_hw_rss_set(self, &aq_nic_cfg->aq_rss); @@ -398,7 +405,7 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self, u8 *mac_addr) /* Reset link status and read out initial hardware counters */ self->aq_link_status.mbps = 0; - hw_atl_utils_update_stats(self); + self->aq_fw_ops->update_stats(self); err = aq_hw_err_from_flags(self); if (err < 0) @@ -923,27 +930,8 @@ static int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, return aq_hw_err_from_flags(self); } -static int hw_atl_b0_hw_set_speed(struct aq_hw_s *self, u32 speed) -{ - int err = 0; - - err = hw_atl_utils_mpi_set_speed(self, speed, MPI_INIT); - if (err < 0) - goto err_exit; - -err_exit: - return err; -} - -static const struct aq_hw_ops hw_atl_ops_ = { - .create = hw_atl_b0_create, - .destroy = hw_atl_b0_destroy, - .get_hw_caps = hw_atl_b0_get_hw_caps, - - .hw_get_mac_permanent = hw_atl_utils_get_mac_permanent, +const struct aq_hw_ops hw_atl_ops_b0 = { .hw_set_mac_address = hw_atl_b0_hw_mac_addr_set, - .hw_get_link_status = hw_atl_utils_mpi_get_link_status, - .hw_set_link_speed = hw_atl_b0_hw_set_speed, .hw_init = hw_atl_b0_hw_init, .hw_deinit = hw_atl_utils_hw_deinit, .hw_set_power = hw_atl_utils_hw_set_power, @@ -973,21 +961,6 @@ static const struct aq_hw_ops hw_atl_ops_ = { .hw_rss_set = hw_atl_b0_hw_rss_set, .hw_rss_hash_set = hw_atl_b0_hw_rss_hash_set, .hw_get_regs = hw_atl_utils_hw_get_regs, - .hw_update_stats = hw_atl_utils_update_stats, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, }; - -const struct aq_hw_ops *hw_atl_b0_get_ops_by_id(struct pci_dev *pdev) -{ - bool is_vid_ok = (pdev->vendor == PCI_VENDOR_ID_AQUANTIA); - bool is_did_ok = ((pdev->device == HW_ATL_DEVICE_ID_0001) || - (pdev->device == HW_ATL_DEVICE_ID_D100) || - (pdev->device == HW_ATL_DEVICE_ID_D107) || - (pdev->device == HW_ATL_DEVICE_ID_D108) || - (pdev->device == HW_ATL_DEVICE_ID_D109)); - - bool is_rev_ok = (pdev->revision == 2U); - - return (is_vid_ok && is_did_ok && is_rev_ok) ? &hw_atl_ops_ : NULL; -} diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h index 3e10969c1df5..2cc8dacfdc27 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h @@ -16,19 +16,27 @@ #include "../aq_common.h" -#ifndef PCI_VENDOR_ID_AQUANTIA +extern const struct aq_hw_caps_s hw_atl_b0_caps_aqc100; +extern const struct aq_hw_caps_s hw_atl_b0_caps_aqc107; +extern const struct aq_hw_caps_s hw_atl_b0_caps_aqc108; +extern const struct aq_hw_caps_s hw_atl_b0_caps_aqc109; -#define PCI_VENDOR_ID_AQUANTIA 0x1D6A -#define HW_ATL_DEVICE_ID_0001 0x0001 -#define HW_ATL_DEVICE_ID_D100 0xD100 -#define HW_ATL_DEVICE_ID_D107 0xD107 -#define HW_ATL_DEVICE_ID_D108 0xD108 -#define HW_ATL_DEVICE_ID_D109 0xD109 +#define hw_atl_b0_caps_aqc111 hw_atl_b0_caps_aqc108 +#define hw_atl_b0_caps_aqc112 hw_atl_b0_caps_aqc109 -#define HW_ATL_NIC_NAME "aQuantia AQtion 5Gbit Network Adapter" +#define hw_atl_b0_caps_aqc100s hw_atl_b0_caps_aqc100 +#define hw_atl_b0_caps_aqc107s hw_atl_b0_caps_aqc107 +#define hw_atl_b0_caps_aqc108s hw_atl_b0_caps_aqc108 +#define hw_atl_b0_caps_aqc109s hw_atl_b0_caps_aqc109 -#endif +#define hw_atl_b0_caps_aqc111s hw_atl_b0_caps_aqc108 +#define hw_atl_b0_caps_aqc112s hw_atl_b0_caps_aqc109 -const struct aq_hw_ops *hw_atl_b0_get_ops_by_id(struct pci_dev *pdev); +#define hw_atl_b0_caps_aqc111e hw_atl_b0_caps_aqc108 +#define hw_atl_b0_caps_aqc112e hw_atl_b0_caps_aqc109 + +extern const struct aq_hw_ops hw_atl_ops_b0; + +#define hw_atl_ops_b1 hw_atl_ops_b0 #endif /* HW_ATL_B0_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h index 740ff73c6d67..405d1455c222 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h @@ -143,38 +143,5 @@ #define HW_ATL_INTR_MODER_MIN 0xFF /* HW layer capabilities */ -static struct aq_hw_caps_s hw_atl_b0_hw_caps_ = { - .ports = 1U, - .is_64_dma = true, - .msix_irqs = 4U, - .irq_mask = ~0U, - .vecs = HW_ATL_B0_RSS_MAX, - .tcs = HW_ATL_B0_TC_MAX, - .rxd_alignment = 1U, - .rxd_size = HW_ATL_B0_RXD_SIZE, - .rxds = 8U * 1024U, - .txd_alignment = 1U, - .txd_size = HW_ATL_B0_TXD_SIZE, - .txds = 8U * 1024U, - .txhwb_alignment = 4096U, - .tx_rings = HW_ATL_B0_TX_RINGS, - .rx_rings = HW_ATL_B0_RX_RINGS, - .hw_features = NETIF_F_HW_CSUM | - NETIF_F_RXCSUM | - NETIF_F_RXHASH | - NETIF_F_SG | - NETIF_F_TSO | - NETIF_F_LRO, - .hw_priv_flags = IFF_UNICAST_FLT, - .link_speed_msk = (HW_ATL_B0_RATE_10G | - HW_ATL_B0_RATE_5G | - HW_ATL_B0_RATE_2G5 | - HW_ATL_B0_RATE_1G | - HW_ATL_B0_RATE_100M), - .flow_control = true, - .mtu = HW_ATL_B0_MTU_JUMBO, - .mac_regs_count = 88, - .fw_ver_expected = HW_ATL_B0_FW_VER_EXPECTED, -}; #endif /* HW_ATL_B0_INTERNAL_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 9c7e9161b4db..967f0fd07fcf 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -13,25 +13,230 @@ #include "../aq_nic.h" #include "../aq_hw_utils.h" -#include "../aq_pci_func.h" #include "hw_atl_utils.h" #include "hw_atl_llh.h" +#include "hw_atl_llh_internal.h" #include <linux/random.h> #define HW_ATL_UCP_0X370_REG 0x0370U #define HW_ATL_FW_SM_RAM 0x2U +#define HW_ATL_MPI_FW_VERSION 0x18 #define HW_ATL_MPI_CONTROL_ADR 0x0368U #define HW_ATL_MPI_STATE_ADR 0x036CU #define HW_ATL_MPI_STATE_MSK 0x00FFU #define HW_ATL_MPI_STATE_SHIFT 0U -#define HW_ATL_MPI_SPEED_MSK 0xFFFFU +#define HW_ATL_MPI_SPEED_MSK 0xFFFF0000U #define HW_ATL_MPI_SPEED_SHIFT 16U -static int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a, - u32 *p, u32 cnt) +#define HW_ATL_MPI_DAISY_CHAIN_STATUS 0x704 +#define HW_ATL_MPI_BOOT_EXIT_CODE 0x388 + +#define HW_ATL_MAC_PHY_CONTROL 0x4000 +#define HW_ATL_MAC_PHY_MPI_RESET_BIT 0x1D + +#define HW_ATL_FW_VER_1X 0x01050006U +#define HW_ATL_FW_VER_2X 0x02000000U +#define HW_ATL_FW_VER_3X 0x03000000U + +#define FORCE_FLASHLESS 0 + +static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual); + +int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops) +{ + int err = 0; + + err = hw_atl_utils_soft_reset(self); + if (err) + return err; + + hw_atl_utils_hw_chip_features_init(self, + &self->chip_features); + + hw_atl_utils_get_fw_version(self, &self->fw_ver_actual); + + if (hw_atl_utils_ver_match(HW_ATL_FW_VER_1X, + self->fw_ver_actual) == 0) { + *fw_ops = &aq_fw_1x_ops; + } else if (hw_atl_utils_ver_match(HW_ATL_FW_VER_2X, + self->fw_ver_actual) == 0) { + *fw_ops = &aq_fw_2x_ops; + } else if (hw_atl_utils_ver_match(HW_ATL_FW_VER_3X, + self->fw_ver_actual) == 0) { + *fw_ops = &aq_fw_2x_ops; + } else { + aq_pr_err("Bad FW version detected: %x\n", + self->fw_ver_actual); + return -EOPNOTSUPP; + } + self->aq_fw_ops = *fw_ops; + err = self->aq_fw_ops->init(self); + return err; +} + +static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self) +{ + int k = 0; + u32 gsr; + + aq_hw_write_reg(self, 0x404, 0x40e1); + AQ_HW_SLEEP(50); + + /* Cleanup SPI */ + aq_hw_write_reg(self, 0x534, 0xA0); + aq_hw_write_reg(self, 0x100, 0x9F); + aq_hw_write_reg(self, 0x100, 0x809F); + + gsr = aq_hw_read_reg(self, HW_ATL_GLB_SOFT_RES_ADR); + aq_hw_write_reg(self, HW_ATL_GLB_SOFT_RES_ADR, (gsr & 0xBFFF) | 0x8000); + + /* Kickstart MAC */ + aq_hw_write_reg(self, 0x404, 0x80e0); + aq_hw_write_reg(self, 0x32a8, 0x0); + aq_hw_write_reg(self, 0x520, 0x1); + AQ_HW_SLEEP(10); + aq_hw_write_reg(self, 0x404, 0x180e0); + + for (k = 0; k < 1000; k++) { + u32 flb_status = aq_hw_read_reg(self, + HW_ATL_MPI_DAISY_CHAIN_STATUS); + + flb_status = flb_status & 0x10; + if (flb_status) + break; + AQ_HW_SLEEP(10); + } + if (k == 1000) { + aq_pr_err("MAC kickstart failed\n"); + return -EIO; + } + + /* FW reset */ + aq_hw_write_reg(self, 0x404, 0x80e0); + AQ_HW_SLEEP(50); + aq_hw_write_reg(self, 0x3a0, 0x1); + + /* Kickstart PHY - skipped */ + + /* Global software reset*/ + hw_atl_rx_rx_reg_res_dis_set(self, 0U); + hw_atl_tx_tx_reg_res_dis_set(self, 0U); + aq_hw_write_reg_bit(self, HW_ATL_MAC_PHY_CONTROL, + BIT(HW_ATL_MAC_PHY_MPI_RESET_BIT), + HW_ATL_MAC_PHY_MPI_RESET_BIT, 0x0); + gsr = aq_hw_read_reg(self, HW_ATL_GLB_SOFT_RES_ADR); + aq_hw_write_reg(self, HW_ATL_GLB_SOFT_RES_ADR, (gsr & 0xBFFF) | 0x8000); + + for (k = 0; k < 1000; k++) { + u32 fw_state = aq_hw_read_reg(self, HW_ATL_MPI_FW_VERSION); + + if (fw_state) + break; + AQ_HW_SLEEP(10); + } + if (k == 1000) { + aq_pr_err("FW kickstart failed\n"); + return -EIO; + } + + return 0; +} + +static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self) +{ + u32 gsr, rbl_status; + int k; + + aq_hw_write_reg(self, 0x404, 0x40e1); + aq_hw_write_reg(self, 0x3a0, 0x1); + aq_hw_write_reg(self, 0x32a8, 0x0); + + /* Alter RBL status */ + aq_hw_write_reg(self, 0x388, 0xDEAD); + + /* Global software reset*/ + hw_atl_rx_rx_reg_res_dis_set(self, 0U); + hw_atl_tx_tx_reg_res_dis_set(self, 0U); + aq_hw_write_reg_bit(self, HW_ATL_MAC_PHY_CONTROL, + BIT(HW_ATL_MAC_PHY_MPI_RESET_BIT), + HW_ATL_MAC_PHY_MPI_RESET_BIT, 0x0); + gsr = aq_hw_read_reg(self, HW_ATL_GLB_SOFT_RES_ADR); + aq_hw_write_reg(self, HW_ATL_GLB_SOFT_RES_ADR, + (gsr & 0xFFFFBFFF) | 0x8000); + + if (FORCE_FLASHLESS) + aq_hw_write_reg(self, 0x534, 0x0); + + aq_hw_write_reg(self, 0x404, 0x40e0); + + /* Wait for RBL boot */ + for (k = 0; k < 1000; k++) { + rbl_status = aq_hw_read_reg(self, 0x388) & 0xFFFF; + if (rbl_status && rbl_status != 0xDEAD) + break; + AQ_HW_SLEEP(10); + } + if (!rbl_status || rbl_status == 0xDEAD) { + aq_pr_err("RBL Restart failed"); + return -EIO; + } + + /* Restore NVR */ + if (FORCE_FLASHLESS) + aq_hw_write_reg(self, 0x534, 0xA0); + + if (rbl_status == 0xF1A7) { + aq_pr_err("No FW detected. Dynamic FW load not implemented\n"); + return -ENOTSUPP; + } + + for (k = 0; k < 1000; k++) { + u32 fw_state = aq_hw_read_reg(self, HW_ATL_MPI_FW_VERSION); + + if (fw_state) + break; + AQ_HW_SLEEP(10); + } + if (k == 1000) { + aq_pr_err("FW kickstart failed\n"); + return -EIO; + } + + return 0; +} + +int hw_atl_utils_soft_reset(struct aq_hw_s *self) +{ + int k; + u32 boot_exit_code = 0; + + for (k = 0; k < 1000; ++k) { + u32 flb_status = aq_hw_read_reg(self, + HW_ATL_MPI_DAISY_CHAIN_STATUS); + boot_exit_code = aq_hw_read_reg(self, + HW_ATL_MPI_BOOT_EXIT_CODE); + if (flb_status != 0x06000000 || boot_exit_code != 0) + break; + } + + if (k == 1000) { + aq_pr_err("Neither RBL nor FLB firmware started\n"); + return -EOPNOTSUPP; + } + + self->rbl_enabled = (boot_exit_code != 0); + + if (self->rbl_enabled) + return hw_atl_utils_soft_reset_rbl(self); + else + return hw_atl_utils_soft_reset_flb(self); +} + +int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a, + u32 *p, u32 cnt) { int err = 0; @@ -137,14 +342,6 @@ static int hw_atl_utils_init_ucp(struct aq_hw_s *self, AQ_HW_WAIT_FOR(0U != (self->mbox_addr = aq_hw_read_reg(self, 0x360U)), 1000U, 10U); - err = hw_atl_utils_ver_match(aq_hw_caps->fw_ver_expected, - aq_hw_read_reg(self, 0x18U)); - - if (err < 0) - pr_err("%s: Bad FW version detected: expected=%x, actual=%x\n", - AQ_CFG_DRV_NAME, - aq_hw_caps->fw_ver_expected, - aq_hw_read_reg(self, 0x18U)); return err; } @@ -286,19 +483,19 @@ void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self, err_exit:; } -int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed, - enum hal_atl_utils_fw_state_e state) +int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed) { - u32 ucp_0x368 = 0; + u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR); - ucp_0x368 = (speed << HW_ATL_MPI_SPEED_SHIFT) | state; - aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, ucp_0x368); + val = (val & HW_ATL_MPI_STATE_MSK) | (speed << HW_ATL_MPI_SPEED_SHIFT); + aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, val); return 0; } void hw_atl_utils_mpi_set(struct aq_hw_s *self, - enum hal_atl_utils_fw_state_e state, u32 speed) + enum hal_atl_utils_fw_state_e state, + u32 speed) { int err = 0; u32 transaction_id = 0; @@ -317,11 +514,22 @@ void hw_atl_utils_mpi_set(struct aq_hw_s *self, goto err_exit; } - err = hw_atl_utils_mpi_set_speed(self, speed, state); + aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, + (speed << HW_ATL_MPI_SPEED_SHIFT) | state); err_exit:; } +static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self, + enum hal_atl_utils_fw_state_e state) +{ + u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR); + + val = state | (val & HW_ATL_MPI_SPEED_MSK); + aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, val); + return 0; +} + int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self) { u32 cp0x036C = aq_hw_read_reg(self, HW_ATL_MPI_STATE_ADR); @@ -369,15 +577,6 @@ int hw_atl_utils_get_mac_permanent(struct aq_hw_s *self, u32 l = 0U; u32 mac_addr[2]; - self->mmio = aq_pci_func_get_mmio(self->aq_pci_func); - - hw_atl_utils_hw_chip_features_init(self, - &self->chip_features); - - err = hw_atl_utils_mpi_create(self); - if (err < 0) - goto err_exit; - if (!aq_hw_read_reg(self, HW_ATL_UCP_0X370_REG)) { unsigned int rnd = 0; unsigned int ucp_0x370 = 0; @@ -423,7 +622,6 @@ int hw_atl_utils_get_mac_permanent(struct aq_hw_s *self, mac[0] = (u8)(0xFFU & h); } -err_exit: return err; } @@ -571,7 +769,7 @@ int hw_atl_utils_hw_get_regs(struct aq_hw_s *self, for (i = 0; i < aq_hw_caps->mac_regs_count; i++) regs_buff[i] = aq_hw_read_reg(self, - hw_atl_utils_hw_mac_regs[i]); + hw_atl_utils_hw_mac_regs[i]); return 0; } @@ -580,3 +778,13 @@ int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version) *fw_version = aq_hw_read_reg(self, 0x18U); return 0; } + +const struct aq_fw_ops aq_fw_1x_ops = { + .init = hw_atl_utils_mpi_create, + .reset = NULL, + .get_mac_permanent = hw_atl_utils_get_mac_permanent, + .set_link_speed = hw_atl_utils_mpi_set_speed, + .set_state = hw_atl_utils_mpi_set_state, + .update_link_status = hw_atl_utils_mpi_get_link_status, + .update_stats = hw_atl_utils_update_stats, +}; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index 40e2319c65d5..2c690947910a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -163,7 +163,7 @@ struct __packed hw_aq_atl_utils_mbox { #define HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 0x02000000U #define IS_CHIP_FEATURE(_F_) (HAL_ATLANTIC_UTILS_CHIP_##_F_ & \ - self->chip_features) + self->chip_features) enum hal_atl_utils_fw_state_e { MPI_DEINIT = 0, @@ -180,10 +180,73 @@ enum hal_atl_utils_fw_state_e { #define HAL_ATLANTIC_RATE_100M BIT(5) #define HAL_ATLANTIC_RATE_INVALID BIT(6) +enum hw_atl_fw2x_rate { + FW2X_RATE_100M = 0x20, + FW2X_RATE_1G = 0x100, + FW2X_RATE_2G5 = 0x200, + FW2X_RATE_5G = 0x400, + FW2X_RATE_10G = 0x800, +}; + +enum hw_atl_fw2x_caps_lo { + CAPS_LO_10BASET_HD = 0x00, + CAPS_LO_10BASET_FD, + CAPS_LO_100BASETX_HD, + CAPS_LO_100BASET4_HD, + CAPS_LO_100BASET2_HD, + CAPS_LO_100BASETX_FD, + CAPS_LO_100BASET2_FD, + CAPS_LO_1000BASET_HD, + CAPS_LO_1000BASET_FD, + CAPS_LO_2P5GBASET_FD, + CAPS_LO_5GBASET_FD, + CAPS_LO_10GBASET_FD, +}; + +enum hw_atl_fw2x_caps_hi { + CAPS_HI_RESERVED1 = 0x00, + CAPS_HI_10BASET_EEE, + CAPS_HI_RESERVED2, + CAPS_HI_PAUSE, + CAPS_HI_ASYMMETRIC_PAUSE, + CAPS_HI_100BASETX_EEE, + CAPS_HI_RESERVED3, + CAPS_HI_RESERVED4, + CAPS_HI_1000BASET_FD_EEE, + CAPS_HI_2P5GBASET_FD_EEE, + CAPS_HI_5GBASET_FD_EEE, + CAPS_HI_10GBASET_FD_EEE, + CAPS_HI_RESERVED5, + CAPS_HI_RESERVED6, + CAPS_HI_RESERVED7, + CAPS_HI_RESERVED8, + CAPS_HI_RESERVED9, + CAPS_HI_CABLE_DIAG, + CAPS_HI_TEMPERATURE, + CAPS_HI_DOWNSHIFT, + CAPS_HI_PTP_AVB_EN, + CAPS_HI_MEDIA_DETECT, + CAPS_HI_LINK_DROP, + CAPS_HI_SLEEP_PROXY, + CAPS_HI_WOL, + CAPS_HI_MAC_STOP, + CAPS_HI_EXT_LOOPBACK, + CAPS_HI_INT_LOOPBACK, + CAPS_HI_EFUSE_AGENT, + CAPS_HI_WOL_TIMER, + CAPS_HI_STATISTICS, + CAPS_HI_TRANSACTION_ID, +}; + struct aq_hw_s; +struct aq_fw_ops; struct aq_hw_caps_s; struct aq_hw_link_status_s; +int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops); + +int hw_atl_utils_soft_reset(struct aq_hw_s *self); + void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p); int hw_atl_utils_mpi_read_mbox(struct aq_hw_s *self, @@ -196,9 +259,6 @@ void hw_atl_utils_mpi_set(struct aq_hw_s *self, enum hal_atl_utils_fw_state_e state, u32 speed); -int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed, - enum hal_atl_utils_fw_state_e state); - int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self); int hw_atl_utils_get_mac_permanent(struct aq_hw_s *self, @@ -220,5 +280,10 @@ int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version); int hw_atl_utils_update_stats(struct aq_hw_s *self); struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self); +int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a, + u32 *p, u32 cnt); + +extern const struct aq_fw_ops aq_fw_1x_ops; +extern const struct aq_fw_ops aq_fw_2x_ops; #endif /* HW_ATL_UTILS_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c new file mode 100644 index 000000000000..8cfce95c82fc --- /dev/null +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -0,0 +1,184 @@ +/* + * aQuantia Corporation Network Driver + * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +/* File hw_atl_utils_fw2x.c: Definition of firmware 2.x functions for + * Atlantic hardware abstraction layer. + */ + +#include "../aq_hw.h" +#include "../aq_hw_utils.h" +#include "../aq_pci_func.h" +#include "../aq_ring.h" +#include "../aq_vec.h" +#include "hw_atl_utils.h" +#include "hw_atl_llh.h" + +#define HW_ATL_FW2X_MPI_EFUSE_ADDR 0x364 +#define HW_ATL_FW2X_MPI_MBOX_ADDR 0x360 + +#define HW_ATL_FW2X_MPI_CONTROL_ADDR 0x368 +#define HW_ATL_FW2X_MPI_CONTROL2_ADDR 0x36C + +#define HW_ATL_FW2X_MPI_STATE_ADDR 0x370 +#define HW_ATL_FW2X_MPI_STATE2_ADDR 0x374 + +static int aq_fw2x_init(struct aq_hw_s *self) +{ + int err = 0; + + /* check 10 times by 1ms */ + AQ_HW_WAIT_FOR(0U != (self->mbox_addr = + aq_hw_read_reg(self, HW_ATL_FW2X_MPI_MBOX_ADDR)), + 1000U, 10U); + return err; +} + +static enum hw_atl_fw2x_rate link_speed_mask_2fw2x_ratemask(u32 speed) +{ + enum hw_atl_fw2x_rate rate = 0; + + if (speed & AQ_NIC_RATE_10G) + rate |= FW2X_RATE_10G; + + if (speed & AQ_NIC_RATE_5G) + rate |= FW2X_RATE_5G; + + if (speed & AQ_NIC_RATE_5GSR) + rate |= FW2X_RATE_5G; + + if (speed & AQ_NIC_RATE_2GS) + rate |= FW2X_RATE_2G5; + + if (speed & AQ_NIC_RATE_1G) + rate |= FW2X_RATE_1G; + + if (speed & AQ_NIC_RATE_100M) + rate |= FW2X_RATE_100M; + + return rate; +} + +static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed) +{ + u32 val = link_speed_mask_2fw2x_ratemask(speed); + + aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL_ADDR, val); + + return 0; +} + +static int aq_fw2x_set_state(struct aq_hw_s *self, + enum hal_atl_utils_fw_state_e state) +{ + /* No explicit state in 2x fw */ + return 0; +} + +static int aq_fw2x_update_link_status(struct aq_hw_s *self) +{ + u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE_ADDR); + u32 speed = mpi_state & (FW2X_RATE_100M | FW2X_RATE_1G | + FW2X_RATE_2G5 | FW2X_RATE_5G | FW2X_RATE_10G); + struct aq_hw_link_status_s *link_status = &self->aq_link_status; + + if (speed) { + if (speed & FW2X_RATE_10G) + link_status->mbps = 10000; + else if (speed & FW2X_RATE_5G) + link_status->mbps = 5000; + else if (speed & FW2X_RATE_2G5) + link_status->mbps = 2500; + else if (speed & FW2X_RATE_1G) + link_status->mbps = 1000; + else if (speed & FW2X_RATE_100M) + link_status->mbps = 100; + else + link_status->mbps = 10000; + } else { + link_status->mbps = 0; + } + + return 0; +} + +int aq_fw2x_get_mac_permanent(struct aq_hw_s *self, u8 *mac) +{ + int err = 0; + u32 h = 0U; + u32 l = 0U; + u32 mac_addr[2] = { 0 }; + u32 efuse_addr = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_EFUSE_ADDR); + + if (efuse_addr != 0) { + err = hw_atl_utils_fw_downld_dwords(self, + efuse_addr + (40U * 4U), + mac_addr, + ARRAY_SIZE(mac_addr)); + if (err) + return err; + mac_addr[0] = __swab32(mac_addr[0]); + mac_addr[1] = __swab32(mac_addr[1]); + } + + ether_addr_copy(mac, (u8 *)mac_addr); + + if ((mac[0] & 0x01U) || ((mac[0] | mac[1] | mac[2]) == 0x00U)) { + unsigned int rnd = 0; + + get_random_bytes(&rnd, sizeof(unsigned int)); + + l = 0xE3000000U + | (0xFFFFU & rnd) + | (0x00 << 16); + h = 0x8001300EU; + + mac[5] = (u8)(0xFFU & l); + l >>= 8; + mac[4] = (u8)(0xFFU & l); + l >>= 8; + mac[3] = (u8)(0xFFU & l); + l >>= 8; + mac[2] = (u8)(0xFFU & l); + mac[1] = (u8)(0xFFU & h); + h >>= 8; + mac[0] = (u8)(0xFFU & h); + } + return err; +} + +static int aq_fw2x_update_stats(struct aq_hw_s *self) +{ + int err = 0; + u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR); + u32 orig_stats_val = mpi_opts & BIT(CAPS_HI_STATISTICS); + + /* Toggle statistics bit for FW to update */ + mpi_opts = mpi_opts ^ BIT(CAPS_HI_STATISTICS); + aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts); + + /* Wait FW to report back */ + AQ_HW_WAIT_FOR(orig_stats_val != + (aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR) & + BIT(CAPS_HI_STATISTICS)), + 1U, 10000U); + if (err) + return err; + + return hw_atl_utils_update_stats(self); +} + +const struct aq_fw_ops aq_fw_2x_ops = { + .init = aq_fw2x_init, + .reset = NULL, + .get_mac_permanent = aq_fw2x_get_mac_permanent, + .set_link_speed = aq_fw2x_set_link_speed, + .set_state = aq_fw2x_set_state, + .update_link_status = aq_fw2x_update_link_status, + .update_stats = aq_fw2x_update_stats, +}; diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h index 9009f2651e70..5265b937677b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/ver.h +++ b/drivers/net/ethernet/aquantia/atlantic/ver.h @@ -10,9 +10,9 @@ #ifndef VER_H #define VER_H -#define NIC_MAJOR_DRIVER_VERSION 1 -#define NIC_MINOR_DRIVER_VERSION 6 -#define NIC_BUILD_DRIVER_VERSION 13 +#define NIC_MAJOR_DRIVER_VERSION 2 +#define NIC_MINOR_DRIVER_VERSION 0 +#define NIC_BUILD_DRIVER_VERSION 2 #define NIC_REVISION_DRIVER_VERSION 0 #define AQ_CFG_DRV_VERSION_SUFFIX "-kern" diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 7919f6112ecf..5e34b34f7740 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -5818,8 +5818,8 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode) struct l2_fhdr *rx_hdr; int ret = -ENODEV; struct bnx2_napi *bnapi = &bp->bnx2_napi[0], *tx_napi; - struct bnx2_tx_ring_info *txr = &bnapi->tx_ring; - struct bnx2_rx_ring_info *rxr = &bnapi->rx_ring; + struct bnx2_tx_ring_info *txr; + struct bnx2_rx_ring_info *rxr; tx_napi = bnapi; diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig index 5713e83be08c..e2cdfa75673f 100644 --- a/drivers/net/ethernet/chelsio/Kconfig +++ b/drivers/net/ethernet/chelsio/Kconfig @@ -69,6 +69,7 @@ config CHELSIO_T4 depends on PCI && (IPV6 || IPV6=n) select FW_LOADER select MDIO + select ZLIB_DEFLATE ---help--- This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet adapter and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 6a015362c340..185fe8df7628 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -3304,6 +3304,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->ethtool_ops = &cxgb_ethtool_ops; netdev->min_mtu = 81; netdev->max_mtu = ETH_MAX_MTU; + netdev->dev_port = pi->port_id; } pci_set_drvdata(pdev, adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 8c9c6b0d2e5d..53b6a02c778e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \ cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \ cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \ - cudbg_common.o cudbg_lib.o + cudbg_common.o cudbg_lib.o cudbg_zlib.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c index f78ba1743b5a..8edc49827af0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c @@ -19,7 +19,8 @@ #include "cudbg_if.h" #include "cudbg_lib_common.h" -int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size, +int cudbg_get_buff(struct cudbg_init *pdbg_init, + struct cudbg_buffer *pdbg_buff, u32 size, struct cudbg_buffer *pin_buff) { u32 offset; @@ -28,17 +29,30 @@ int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size, if (offset + size > pdbg_buff->size) return CUDBG_STATUS_NO_MEM; + if (pdbg_init->compress_type != CUDBG_COMPRESSION_NONE) { + if (size > pdbg_init->compress_buff_size) + return CUDBG_STATUS_NO_MEM; + + pin_buff->data = (char *)pdbg_init->compress_buff; + pin_buff->offset = 0; + pin_buff->size = size; + return 0; + } + pin_buff->data = (char *)pdbg_buff->data + offset; pin_buff->offset = offset; pin_buff->size = size; - pdbg_buff->size -= size; return 0; } -void cudbg_put_buff(struct cudbg_buffer *pin_buff, - struct cudbg_buffer *pdbg_buff) +void cudbg_put_buff(struct cudbg_init *pdbg_init, + struct cudbg_buffer *pin_buff) { - pdbg_buff->size += pin_buff->size; + /* Clear compression buffer for re-use */ + if (pdbg_init->compress_type != CUDBG_COMPRESSION_NONE) + memset(pdbg_init->compress_buff, 0, + pdbg_init->compress_buff_size); + pin_buff->data = NULL; pin_buff->offset = 0; pin_buff->size = 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h index 88e740082a02..8568a51f6414 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -87,6 +87,10 @@ struct cudbg_init { struct adapter *adap; /* Pointer to adapter structure */ void *outbuf; /* Output buffer */ u32 outbuf_size; /* Output buffer size */ + u8 compress_type; /* Type of compression to use */ + void *compress_buff; /* Compression buffer */ + u32 compress_buff_size; /* Compression buffer size */ + void *workspace; /* Workspace for zlib */ }; static inline unsigned int cudbg_mbytes_to_bytes(unsigned int size) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 0a3871f10787..8b95117c2923 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -23,12 +23,57 @@ #include "cudbg_lib_common.h" #include "cudbg_entity.h" #include "cudbg_lib.h" +#include "cudbg_zlib.h" -static void cudbg_write_and_release_buff(struct cudbg_buffer *pin_buff, - struct cudbg_buffer *dbg_buff) +static int cudbg_do_compression(struct cudbg_init *pdbg_init, + struct cudbg_buffer *pin_buff, + struct cudbg_buffer *dbg_buff) { - cudbg_update_buff(pin_buff, dbg_buff); - cudbg_put_buff(pin_buff, dbg_buff); + struct cudbg_buffer temp_in_buff = { 0 }; + int bytes_left, bytes_read, bytes; + u32 offset = dbg_buff->offset; + int rc; + + temp_in_buff.offset = pin_buff->offset; + temp_in_buff.data = pin_buff->data; + temp_in_buff.size = pin_buff->size; + + bytes_left = pin_buff->size; + bytes_read = 0; + while (bytes_left > 0) { + /* Do compression in smaller chunks */ + bytes = min_t(unsigned long, bytes_left, + (unsigned long)CUDBG_CHUNK_SIZE); + temp_in_buff.data = (char *)pin_buff->data + bytes_read; + temp_in_buff.size = bytes; + rc = cudbg_compress_buff(pdbg_init, &temp_in_buff, dbg_buff); + if (rc) + return rc; + bytes_left -= bytes; + bytes_read += bytes; + } + + pin_buff->size = dbg_buff->offset - offset; + return 0; +} + +static int cudbg_write_and_release_buff(struct cudbg_init *pdbg_init, + struct cudbg_buffer *pin_buff, + struct cudbg_buffer *dbg_buff) +{ + int rc = 0; + + if (pdbg_init->compress_type == CUDBG_COMPRESSION_NONE) { + cudbg_update_buff(pin_buff, dbg_buff); + } else { + rc = cudbg_do_compression(pdbg_init, pin_buff, dbg_buff); + if (rc) + goto out; + } + +out: + cudbg_put_buff(pdbg_init, pin_buff); + return rc; } static int is_fw_attached(struct cudbg_init *pdbg_init) @@ -371,12 +416,11 @@ int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, else if (is_t5(padap->params.chip) || is_t6(padap->params.chip)) buf_size = T5_REGMAP_SIZE; - rc = cudbg_get_buff(dbg_buff, buf_size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, buf_size, &temp_buff); if (rc) return rc; t4_get_regs(padap, (void *)temp_buff.data, temp_buff.size); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, @@ -395,7 +439,7 @@ int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, } dparams = &padap->params.devlog; - rc = cudbg_get_buff(dbg_buff, dparams->size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, dparams->size, &temp_buff); if (rc) return rc; @@ -410,12 +454,11 @@ int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, spin_unlock(&padap->win0_lock); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_cim_la(struct cudbg_init *pdbg_init, @@ -436,14 +479,14 @@ int cudbg_collect_cim_la(struct cudbg_init *pdbg_init, } size += sizeof(cfg); - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; rc = t4_cim_read(padap, UP_UP_DBG_LA_CFG_A, 1, &cfg); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } @@ -453,11 +496,10 @@ int cudbg_collect_cim_la(struct cudbg_init *pdbg_init, NULL); if (rc < 0) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init, @@ -469,7 +511,7 @@ int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init, int size, rc; size = 2 * CIM_MALA_SIZE * 5 * sizeof(u32); - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -477,8 +519,7 @@ int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init, (u32 *)temp_buff.data, (u32 *)((char *)temp_buff.data + 5 * CIM_MALA_SIZE)); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_cim_qcfg(struct cudbg_init *pdbg_init, @@ -490,7 +531,7 @@ int cudbg_collect_cim_qcfg(struct cudbg_init *pdbg_init, struct cudbg_cim_qcfg *cim_qcfg_data; int rc; - rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_cim_qcfg), + rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_cim_qcfg), &temp_buff); if (rc) return rc; @@ -501,7 +542,7 @@ int cudbg_collect_cim_qcfg(struct cudbg_init *pdbg_init, ARRAY_SIZE(cim_qcfg_data->stat), cim_qcfg_data->stat); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } @@ -510,14 +551,13 @@ int cudbg_collect_cim_qcfg(struct cudbg_init *pdbg_init, cim_qcfg_data->obq_wr); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } t4_read_cimq_cfg(padap, cim_qcfg_data->base, cim_qcfg_data->size, cim_qcfg_data->thres); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } static int cudbg_read_cim_ibq(struct cudbg_init *pdbg_init, @@ -531,7 +571,7 @@ static int cudbg_read_cim_ibq(struct cudbg_init *pdbg_init, /* collect CIM IBQ */ qsize = CIM_IBQ_SIZE * 4 * sizeof(u32); - rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, qsize, &temp_buff); if (rc) return rc; @@ -545,11 +585,10 @@ static int cudbg_read_cim_ibq(struct cudbg_init *pdbg_init, else rc = no_of_read_words; cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init, @@ -616,7 +655,7 @@ static int cudbg_read_cim_obq(struct cudbg_init *pdbg_init, /* collect CIM OBQ */ qsize = cudbg_cim_obq_size(padap, qid); - rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, qsize, &temp_buff); if (rc) return rc; @@ -630,11 +669,10 @@ static int cudbg_read_cim_obq(struct cudbg_init *pdbg_init, else rc = no_of_read_words; cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_cim_obq_ulp0(struct cudbg_init *pdbg_init, @@ -887,7 +925,7 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, bytes = min_t(unsigned long, bytes_left, (unsigned long)CUDBG_CHUNK_SIZE); - rc = cudbg_get_buff(dbg_buff, bytes, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, bytes, &temp_buff); if (rc) return rc; @@ -906,14 +944,19 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, spin_unlock(&padap->win0_lock); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } skip_read: bytes_left -= bytes; bytes_read += bytes; - cudbg_write_and_release_buff(&temp_buff, dbg_buff); + rc = cudbg_write_and_release_buff(pdbg_init, &temp_buff, + dbg_buff); + if (rc) { + cudbg_put_buff(pdbg_init, &temp_buff); + return rc; + } } return rc; } @@ -1007,18 +1050,18 @@ int cudbg_collect_rss(struct cudbg_init *pdbg_init, int rc, nentries; nentries = t4_chip_rss_size(padap); - rc = cudbg_get_buff(dbg_buff, nentries * sizeof(u16), &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, nentries * sizeof(u16), + &temp_buff); if (rc) return rc; rc = t4_read_rss(padap, (u16 *)temp_buff.data); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, @@ -1031,7 +1074,7 @@ int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, int vf, rc, vf_count; vf_count = padap->params.arch.vfcount; - rc = cudbg_get_buff(dbg_buff, + rc = cudbg_get_buff(pdbg_init, dbg_buff, vf_count * sizeof(struct cudbg_rss_vf_conf), &temp_buff); if (rc) @@ -1041,8 +1084,7 @@ int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, for (vf = 0; vf < vf_count; vf++) t4_read_rss_vf_config(padap, vf, &vfconf[vf].rss_vf_vfl, &vfconf[vf].rss_vf_vfh, true); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_path_mtu(struct cudbg_init *pdbg_init, @@ -1053,13 +1095,13 @@ int cudbg_collect_path_mtu(struct cudbg_init *pdbg_init, struct cudbg_buffer temp_buff = { 0 }; int rc; - rc = cudbg_get_buff(dbg_buff, NMTUS * sizeof(u16), &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, NMTUS * sizeof(u16), + &temp_buff); if (rc) return rc; t4_read_mtu_tbl(padap, (u16 *)temp_buff.data, NULL); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_pm_stats(struct cudbg_init *pdbg_init, @@ -1071,7 +1113,7 @@ int cudbg_collect_pm_stats(struct cudbg_init *pdbg_init, struct cudbg_pm_stats *pm_stats_buff; int rc; - rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_pm_stats), + rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_pm_stats), &temp_buff); if (rc) return rc; @@ -1079,8 +1121,7 @@ int cudbg_collect_pm_stats(struct cudbg_init *pdbg_init, pm_stats_buff = (struct cudbg_pm_stats *)temp_buff.data; t4_pmtx_get_stats(padap, pm_stats_buff->tx_cnt, pm_stats_buff->tx_cyc); t4_pmrx_get_stats(padap, pm_stats_buff->rx_cnt, pm_stats_buff->rx_cyc); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init, @@ -1095,7 +1136,7 @@ int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init, if (!padap->params.vpd.cclk) return CUDBG_STATUS_CCLK_NOT_DEFINED; - rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_hw_sched), + rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_hw_sched), &temp_buff); hw_sched_buff = (struct cudbg_hw_sched *)temp_buff.data; hw_sched_buff->map = t4_read_reg(padap, TP_TX_MOD_QUEUE_REQ_MAP_A); @@ -1104,8 +1145,7 @@ int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init, for (i = 0; i < NTX_SCHED; ++i) t4_get_tx_sched(padap, i, &hw_sched_buff->kbps[i], &hw_sched_buff->ipg[i], true); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, @@ -1129,7 +1169,7 @@ int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, n = n / (IREG_NUM_ELEM * sizeof(u32)); size = sizeof(struct ireg_buf) * n; - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -1218,8 +1258,7 @@ int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, tp_pio->ireg_local_offset, true); ch_tp_pio++; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, @@ -1231,7 +1270,8 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, struct ireg_buf *ch_sge_dbg; int i, rc; - rc = cudbg_get_buff(dbg_buff, sizeof(*ch_sge_dbg) * 2, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(*ch_sge_dbg) * 2, + &temp_buff); if (rc) return rc; @@ -1252,8 +1292,7 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, sge_pio->ireg_local_offset); ch_sge_dbg++; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_ulprx_la(struct cudbg_init *pdbg_init, @@ -1265,7 +1304,7 @@ int cudbg_collect_ulprx_la(struct cudbg_init *pdbg_init, struct cudbg_ulprx_la *ulprx_la_buff; int rc; - rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_ulprx_la), + rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_ulprx_la), &temp_buff); if (rc) return rc; @@ -1273,8 +1312,7 @@ int cudbg_collect_ulprx_la(struct cudbg_init *pdbg_init, ulprx_la_buff = (struct cudbg_ulprx_la *)temp_buff.data; t4_ulprx_read_la(padap, (u32 *)ulprx_la_buff->data); ulprx_la_buff->size = ULPRX_LA_SIZE; - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_tp_la(struct cudbg_init *pdbg_init, @@ -1287,15 +1325,14 @@ int cudbg_collect_tp_la(struct cudbg_init *pdbg_init, int size, rc; size = sizeof(struct cudbg_tp_la) + TPLA_SIZE * sizeof(u64); - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; tp_la_buff = (struct cudbg_tp_la *)temp_buff.data; tp_la_buff->mode = DBGLAMODE_G(t4_read_reg(padap, TP_DBG_LA_CONFIG_A)); t4_tp_read_la(padap, (u64 *)tp_la_buff->data, NULL); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_meminfo(struct cudbg_init *pdbg_init, @@ -1307,7 +1344,8 @@ int cudbg_collect_meminfo(struct cudbg_init *pdbg_init, struct cudbg_meminfo *meminfo_buff; int rc; - rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_meminfo), &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_meminfo), + &temp_buff); if (rc) return rc; @@ -1315,12 +1353,11 @@ int cudbg_collect_meminfo(struct cudbg_init *pdbg_init, rc = cudbg_fill_meminfo(padap, meminfo_buff); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init, @@ -1334,7 +1371,7 @@ int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init, size = sizeof(struct cudbg_cim_pif_la) + 2 * CIM_PIFLA_SIZE * 6 * sizeof(u32); - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -1343,8 +1380,7 @@ int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init, t4_cim_read_pif_la(padap, (u32 *)cim_pif_la_buff->data, (u32 *)cim_pif_la_buff->data + 6 * CIM_PIFLA_SIZE, NULL, NULL); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_clk_info(struct cudbg_init *pdbg_init, @@ -1360,7 +1396,7 @@ int cudbg_collect_clk_info(struct cudbg_init *pdbg_init, if (!padap->params.vpd.cclk) return CUDBG_STATUS_CCLK_NOT_DEFINED; - rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_clk_info), + rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_clk_info), &temp_buff); if (rc) return rc; @@ -1392,8 +1428,7 @@ int cudbg_collect_clk_info(struct cudbg_init *pdbg_init, clk_info_buff->finwait2_timer = tp_tick_us * t4_read_reg(padap, TP_FINWAIT2_TIMER_A); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, @@ -1408,7 +1443,7 @@ int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32)); size = sizeof(struct ireg_buf) * n * 2; - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -1449,8 +1484,7 @@ int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, pcie_pio->ireg_local_offset); ch_pcie++; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, @@ -1465,7 +1499,7 @@ int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32)); size = sizeof(struct ireg_buf) * n * 2; - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -1506,8 +1540,7 @@ int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, pm_pio->ireg_local_offset); ch_pm++; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_tid(struct cudbg_init *pdbg_init, @@ -1521,7 +1554,8 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init, u32 para[2], val[2]; int rc; - rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_tid_info_region_rev1), + rc = cudbg_get_buff(pdbg_init, dbg_buff, + sizeof(struct cudbg_tid_info_region_rev1), &temp_buff); if (rc) return rc; @@ -1544,7 +1578,7 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init, rc = t4_query_params(padap, padap->mbox, padap->pf, 0, 2, para, val); if (rc < 0) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } tid->uotid_base = val[0]; @@ -1563,7 +1597,7 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init, para, val); if (rc < 0) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } tid->hpftid_base = val[0]; @@ -1591,8 +1625,7 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init, #undef FW_PARAM_PFVF_A - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_pcie_config(struct cudbg_init *pdbg_init, @@ -1606,7 +1639,7 @@ int cudbg_collect_pcie_config(struct cudbg_init *pdbg_init, size = sizeof(u32) * CUDBG_NUM_PCIE_CONFIG_REGS; n = sizeof(t5_pcie_config_array) / (2 * sizeof(u32)); - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -1618,8 +1651,7 @@ int cudbg_collect_pcie_config(struct cudbg_init *pdbg_init, value++; } } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } static int cudbg_sge_ctxt_check_valid(u32 *buf, int type) @@ -1799,7 +1831,7 @@ int cudbg_collect_dump_context(struct cudbg_init *pdbg_init, return CUDBG_STATUS_ENTITY_NOT_FOUND; size = rc; - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -1813,7 +1845,7 @@ int cudbg_collect_dump_context(struct cudbg_init *pdbg_init, ctx_buf = kvzalloc(max_ctx_size, GFP_KERNEL); if (!ctx_buf) { - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return -ENOMEM; } @@ -1876,8 +1908,7 @@ int cudbg_collect_dump_context(struct cudbg_init *pdbg_init, */ cudbg_get_sge_ctxt_fw(pdbg_init, max_ctx_qid, CTXT_FLM, &buff); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } static inline void cudbg_tcamxy2valmask(u64 x, u64 y, u8 *addr, u64 *mask) @@ -2038,7 +2069,7 @@ int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, n = padap->params.arch.mps_tcam_size; size = sizeof(struct cudbg_mps_tcam) * n; - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -2047,7 +2078,7 @@ int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, rc = cudbg_collect_tcam_index(padap, tcam, i); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } total_size += sizeof(struct cudbg_mps_tcam); @@ -2057,11 +2088,10 @@ int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, if (!total_size) { rc = CUDBG_SYSTEM_ERROR; cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, @@ -2112,7 +2142,7 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, if (rc) return rc; - rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_vpd_data), + rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_vpd_data), &temp_buff); if (rc) return rc; @@ -2128,8 +2158,7 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, vpd_data->fw_minor = FW_HDR_FW_VER_MINOR_G(fw_vers); vpd_data->fw_micro = FW_HDR_FW_VER_MICRO_G(fw_vers); vpd_data->fw_build = FW_HDR_FW_VER_BUILD_G(fw_vers); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } static int cudbg_read_tid(struct cudbg_init *pdbg_init, u32 tid, @@ -2280,7 +2309,7 @@ int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init, size = sizeof(struct cudbg_tid_data) * tcam_region.max_tid; size += sizeof(struct cudbg_tcam); - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -2292,7 +2321,7 @@ int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init, rc = cudbg_read_tid(pdbg_init, i, tid_data); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } @@ -2303,8 +2332,7 @@ int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init, bytes += sizeof(struct cudbg_tid_data); } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_cctrl(struct cudbg_init *pdbg_init, @@ -2317,13 +2345,12 @@ int cudbg_collect_cctrl(struct cudbg_init *pdbg_init, int rc; size = sizeof(u16) * NMTUS * NCCTRL_WIN; - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; t4_read_cong_tbl(padap, (void *)temp_buff.data); - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, @@ -2341,7 +2368,7 @@ int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, n = sizeof(t6_ma_ireg_array) / (IREG_NUM_ELEM * sizeof(u32)); size = sizeof(struct ireg_buf) * n * 2; - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -2377,8 +2404,7 @@ int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, } ma_indr++; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init, @@ -2391,7 +2417,7 @@ int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init, u32 i, j; int rc; - rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_ulptx_la), + rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_ulptx_la), &temp_buff); if (rc) return rc; @@ -2412,8 +2438,7 @@ int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init, t4_read_reg(padap, ULP_TX_LA_RDDATA_0_A + 0x10 * i); } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, @@ -2438,7 +2463,7 @@ int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, return CUDBG_STATUS_NOT_IMPLEMENTED; size = sizeof(struct ireg_buf) * n; - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -2488,14 +2513,13 @@ int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, up_cim_reg->ireg_local_offset + (j * local_offset), local_range, buff); if (rc) { - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } } up_cim++; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init, @@ -2508,7 +2532,8 @@ int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init, int i, rc; u32 addr; - rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_pbt_tables), + rc = cudbg_get_buff(pdbg_init, dbg_buff, + sizeof(struct cudbg_pbt_tables), &temp_buff); if (rc) return rc; @@ -2521,7 +2546,7 @@ int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init, &pbt->pbt_dynamic[i]); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } } @@ -2534,7 +2559,7 @@ int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init, &pbt->pbt_static[i]); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } } @@ -2546,7 +2571,7 @@ int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init, &pbt->lrf_table[i]); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } } @@ -2558,12 +2583,11 @@ int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init, &pbt->pbt_data[i]); if (rc) { cudbg_err->sys_err = rc; - cudbg_put_buff(&temp_buff, dbg_buff); + cudbg_put_buff(pdbg_init, &temp_buff); return rc; } } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, @@ -2584,7 +2608,7 @@ int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, log = padap->mbox_log; mbox_cmds = padap->mbox_log->size; size = sizeof(struct cudbg_mbox_log) * mbox_cmds; - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -2607,8 +2631,7 @@ int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, } mboxlog++; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init, @@ -2626,7 +2649,7 @@ int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init, n = sizeof(t6_hma_ireg_array) / (IREG_NUM_ELEM * sizeof(u32)); size = sizeof(struct ireg_buf) * n; - rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -2644,6 +2667,5 @@ int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init, hma_fli->ireg_local_offset); hma_indr++; } - cudbg_write_and_release_buff(&temp_buff, dbg_buff); - return rc; + return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h index 24b33f28e548..8150ea85d6a5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h @@ -26,6 +26,7 @@ enum cudbg_dump_type { enum cudbg_compression_type { CUDBG_COMPRESSION_NONE = 1, + CUDBG_COMPRESSION_ZLIB, }; struct cudbg_hdr { @@ -78,10 +79,11 @@ struct cudbg_error { #define CDUMP_MAX_COMP_BUF_SIZE ((64 * 1024) - 1) #define CUDBG_CHUNK_SIZE ((CDUMP_MAX_COMP_BUF_SIZE / 1024) * 1024) -int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size, +int cudbg_get_buff(struct cudbg_init *pdbg_init, + struct cudbg_buffer *pdbg_buff, u32 size, struct cudbg_buffer *pin_buff); -void cudbg_put_buff(struct cudbg_buffer *pin_buff, - struct cudbg_buffer *pdbg_buff); +void cudbg_put_buff(struct cudbg_init *pdbg_init, + struct cudbg_buffer *pin_buff); void cudbg_update_buff(struct cudbg_buffer *pin_buff, struct cudbg_buffer *pout_buff); #endif /* __CUDBG_LIB_COMMON_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c new file mode 100644 index 000000000000..25cc06d75cff --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2018 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include <linux/zlib.h> + +#include "cxgb4.h" +#include "cudbg_if.h" +#include "cudbg_lib_common.h" +#include "cudbg_zlib.h" + +static int cudbg_get_compress_hdr(struct cudbg_buffer *pdbg_buff, + struct cudbg_buffer *pin_buff) +{ + if (pdbg_buff->offset + sizeof(struct cudbg_compress_hdr) > + pdbg_buff->size) + return CUDBG_STATUS_NO_MEM; + + pin_buff->data = (char *)pdbg_buff->data + pdbg_buff->offset; + pin_buff->offset = 0; + pin_buff->size = sizeof(struct cudbg_compress_hdr); + pdbg_buff->offset += sizeof(struct cudbg_compress_hdr); + return 0; +} + +int cudbg_compress_buff(struct cudbg_init *pdbg_init, + struct cudbg_buffer *pin_buff, + struct cudbg_buffer *pout_buff) +{ + struct cudbg_buffer temp_buff = { 0 }; + struct z_stream_s compress_stream; + struct cudbg_compress_hdr *c_hdr; + int rc; + + /* Write compression header to output buffer before compression */ + rc = cudbg_get_compress_hdr(pout_buff, &temp_buff); + if (rc) + return rc; + + c_hdr = (struct cudbg_compress_hdr *)temp_buff.data; + c_hdr->compress_id = CUDBG_ZLIB_COMPRESS_ID; + + memset(&compress_stream, 0, sizeof(struct z_stream_s)); + compress_stream.workspace = pdbg_init->workspace; + rc = zlib_deflateInit2(&compress_stream, Z_DEFAULT_COMPRESSION, + Z_DEFLATED, CUDBG_ZLIB_WIN_BITS, + CUDBG_ZLIB_MEM_LVL, Z_DEFAULT_STRATEGY); + if (rc != Z_OK) + return CUDBG_SYSTEM_ERROR; + + compress_stream.next_in = pin_buff->data; + compress_stream.avail_in = pin_buff->size; + compress_stream.next_out = pout_buff->data + pout_buff->offset; + compress_stream.avail_out = pout_buff->size - pout_buff->offset; + + rc = zlib_deflate(&compress_stream, Z_FINISH); + if (rc != Z_STREAM_END) + return CUDBG_SYSTEM_ERROR; + + rc = zlib_deflateEnd(&compress_stream); + if (rc != Z_OK) + return CUDBG_SYSTEM_ERROR; + + c_hdr->compress_size = compress_stream.total_out; + c_hdr->decompress_size = pin_buff->size; + pout_buff->offset += compress_stream.total_out; + + return 0; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h new file mode 100644 index 000000000000..60d23805dfc3 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2018 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CUDBG_ZLIB_H__ +#define __CUDBG_ZLIB_H__ + +#include <linux/zlib.h> + +#define CUDBG_ZLIB_COMPRESS_ID 17 +#define CUDBG_ZLIB_WIN_BITS 12 +#define CUDBG_ZLIB_MEM_LVL 4 + +struct cudbg_compress_hdr { + u32 compress_id; + u64 decompress_size; + u64 compress_size; + u64 rsvd[32]; +}; + +static inline int cudbg_get_workspace_size(void) +{ + return zlib_deflate_workspacesize(CUDBG_ZLIB_WIN_BITS, + CUDBG_ZLIB_MEM_LVL); +} + +int cudbg_compress_buff(struct cudbg_init *pdbg_init, + struct cudbg_buffer *pin_buff, + struct cudbg_buffer *pout_buff); +#endif /* __CUDBG_ZLIB_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index f05b58f74c7a..429467364219 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -820,6 +820,7 @@ struct vf_info { unsigned char vf_mac_addr[ETH_ALEN]; unsigned int tx_rate; bool pf_set_mac; + u16 vlan; }; struct mbox_list { @@ -846,6 +847,8 @@ struct adapter { int msg_enable; __be16 vxlan_port; u8 vxlan_port_cnt; + __be16 geneve_port; + u8 geneve_port_cnt; struct adapter_params params; struct cxgb4_virt_res vres; @@ -1736,4 +1739,6 @@ void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl); void free_tx_desc(struct adapter *adap, struct sge_txq *q, unsigned int n, bool unmap); void free_txq(struct adapter *adap, struct sge_txq *q); +int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf, + u16 vlan); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index a2d6c8a69c52..30485f9a598f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -18,6 +18,7 @@ #include "t4_regs.h" #include "cxgb4.h" #include "cxgb4_cudbg.h" +#include "cudbg_zlib.h" static const struct cxgb4_collect_entity cxgb4_collect_mem_dump[] = { { CUDBG_EDC0, cudbg_collect_edc0_meminfo }, @@ -318,6 +319,7 @@ u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag) { u32 i, entity; u32 len = 0; + u32 wsize; if (flag & CXGB4_ETH_DUMP_HW) { for (i = 0; i < ARRAY_SIZE(cxgb4_collect_hw_dump); i++) { @@ -333,6 +335,11 @@ u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag) } } + /* If compression is enabled, a smaller destination buffer is enough */ + wsize = cudbg_get_workspace_size(); + if (wsize && len > CUDBG_DUMP_BUFF_SIZE) + len = CUDBG_DUMP_BUFF_SIZE; + return len; } @@ -341,22 +348,14 @@ static void cxgb4_cudbg_collect_entity(struct cudbg_init *pdbg_init, const struct cxgb4_collect_entity *e_arr, u32 arr_size, void *buf, u32 *tot_size) { - struct adapter *adap = pdbg_init->adap; struct cudbg_error cudbg_err = { 0 }; struct cudbg_entity_hdr *entity_hdr; - u32 entity_size, i; - u32 total_size = 0; + u32 i, total_size = 0; int ret; for (i = 0; i < arr_size; i++) { const struct cxgb4_collect_entity *e = &e_arr[i]; - /* Skip entities that won't fit in output buffer */ - entity_size = cxgb4_get_entity_length(adap, e->entity); - if (entity_size > - pdbg_init->outbuf_size - *tot_size - total_size) - continue; - entity_hdr = cudbg_get_entity_hdr(buf, e->entity); entity_hdr->entity_type = e->entity; entity_hdr->start_offset = dbg_buff->offset; @@ -382,16 +381,40 @@ static void cxgb4_cudbg_collect_entity(struct cudbg_init *pdbg_init, *tot_size += total_size; } +static int cudbg_alloc_compress_buff(struct cudbg_init *pdbg_init) +{ + u32 workspace_size; + + workspace_size = cudbg_get_workspace_size(); + pdbg_init->compress_buff = vzalloc(CUDBG_COMPRESS_BUFF_SIZE + + workspace_size); + if (!pdbg_init->compress_buff) + return -ENOMEM; + + pdbg_init->compress_buff_size = CUDBG_COMPRESS_BUFF_SIZE; + pdbg_init->workspace = (u8 *)pdbg_init->compress_buff + + CUDBG_COMPRESS_BUFF_SIZE - workspace_size; + return 0; +} + +static void cudbg_free_compress_buff(struct cudbg_init *pdbg_init) +{ + if (pdbg_init->compress_buff) + vfree(pdbg_init->compress_buff); +} + int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, u32 flag) { - struct cudbg_init cudbg_init = { 0 }; struct cudbg_buffer dbg_buff = { 0 }; u32 size, min_size, total_size = 0; + struct cudbg_init cudbg_init; struct cudbg_hdr *cudbg_hdr; + int rc; size = *buf_size; + memset(&cudbg_init, 0, sizeof(struct cudbg_init)); cudbg_init.adap = adap; cudbg_init.outbuf = buf; cudbg_init.outbuf_size = size; @@ -408,7 +431,6 @@ int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, cudbg_hdr->max_entities = CUDBG_MAX_ENTITY; cudbg_hdr->chip_ver = adap->params.chip; cudbg_hdr->dump_type = CUDBG_DUMP_TYPE_MINI; - cudbg_hdr->compress_type = CUDBG_COMPRESSION_NONE; min_size = sizeof(struct cudbg_hdr) + sizeof(struct cudbg_entity_hdr) * @@ -416,6 +438,24 @@ int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, if (size < min_size) return -ENOMEM; + rc = cudbg_get_workspace_size(); + if (rc) { + /* Zlib available. So, use zlib deflate */ + cudbg_init.compress_type = CUDBG_COMPRESSION_ZLIB; + rc = cudbg_alloc_compress_buff(&cudbg_init); + if (rc) { + /* Ignore error and continue without compression. */ + dev_warn(adap->pdev_dev, + "Fail allocating compression buffer ret: %d. Continuing without compression.\n", + rc); + cudbg_init.compress_type = CUDBG_COMPRESSION_NONE; + rc = 0; + } + } else { + cudbg_init.compress_type = CUDBG_COMPRESSION_NONE; + } + + cudbg_hdr->compress_type = cudbg_init.compress_type; dbg_buff.offset += min_size; total_size = dbg_buff.offset; @@ -433,8 +473,12 @@ int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, buf, &total_size); + cudbg_free_compress_buff(&cudbg_init); cudbg_hdr->data_len = total_size; - *buf_size = total_size; + if (cudbg_init.compress_type != CUDBG_COMPRESSION_NONE) + *buf_size = size; + else + *buf_size = total_size; return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h index 7ceeb0bc9fa8..ce1ac9a1c878 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h @@ -23,6 +23,9 @@ #include "cudbg_entity.h" #include "cudbg_lib.h" +#define CUDBG_DUMP_BUFF_SIZE (32 * 1024 * 1024) /* 32 MB */ +#define CUDBG_COMPRESS_BUFF_SIZE (4 * 1024 * 1024) /* 4 MB */ + typedef int (*cudbg_collect_callback_t)(struct cudbg_init *pdbg_init, struct cudbg_buffer *dbg_buff, struct cudbg_error *cudbg_err); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 677a3ba83c1f..3177b0c9bd2d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -439,19 +439,32 @@ int cxgb4_get_free_ftid(struct net_device *dev, int family) if (ftid >= t->nftids) ftid = -1; } else { - ftid = bitmap_find_free_region(t->ftid_bmap, t->nftids, 2); - if (ftid < 0) - goto out_unlock; + if (is_t6(adap->params.chip)) { + ftid = bitmap_find_free_region(t->ftid_bmap, + t->nftids, 1); + if (ftid < 0) + goto out_unlock; + + /* this is only a lookup, keep the found region + * unallocated + */ + bitmap_release_region(t->ftid_bmap, ftid, 1); + } else { + ftid = bitmap_find_free_region(t->ftid_bmap, + t->nftids, 2); + if (ftid < 0) + goto out_unlock; - /* this is only a lookup, keep the found region unallocated */ - bitmap_release_region(t->ftid_bmap, ftid, 2); + bitmap_release_region(t->ftid_bmap, ftid, 2); + } } out_unlock: spin_unlock_bh(&t->ftid_lock); return ftid; } -static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family) +static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family, + unsigned int chip_ver) { spin_lock_bh(&t->ftid_lock); @@ -460,22 +473,31 @@ static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family) return -EBUSY; } - if (family == PF_INET) + if (family == PF_INET) { __set_bit(fidx, t->ftid_bmap); - else - bitmap_allocate_region(t->ftid_bmap, fidx, 2); + } else { + if (chip_ver < CHELSIO_T6) + bitmap_allocate_region(t->ftid_bmap, fidx, 2); + else + bitmap_allocate_region(t->ftid_bmap, fidx, 1); + } spin_unlock_bh(&t->ftid_lock); return 0; } -static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family) +static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family, + unsigned int chip_ver) { spin_lock_bh(&t->ftid_lock); - if (family == PF_INET) + if (family == PF_INET) { __clear_bit(fidx, t->ftid_bmap); - else - bitmap_release_region(t->ftid_bmap, fidx, 2); + } else { + if (chip_ver < CHELSIO_T6) + bitmap_release_region(t->ftid_bmap, fidx, 2); + else + bitmap_release_region(t->ftid_bmap, fidx, 1); + } spin_unlock_bh(&t->ftid_lock); } @@ -1249,23 +1271,42 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, } } } else { /* IPv6 */ - /* Ensure that the IPv6 filter is aligned on a - * multiple of 4 boundary. - */ - if (filter_id & 0x3) { - dev_err(adapter->pdev_dev, - "Invalid location. IPv6 must be aligned on a 4-slot boundary\n"); - return -EINVAL; - } + if (chip_ver < CHELSIO_T6) { + /* Ensure that the IPv6 filter is aligned on a + * multiple of 4 boundary. + */ + if (filter_id & 0x3) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 must be aligned on a 4-slot boundary\n"); + return -EINVAL; + } - /* Check all except the base overlapping IPv4 filter slots. */ - for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) { + /* Check all except the base overlapping IPv4 filter + * slots. + */ + for (fidx = filter_id + 1; fidx < filter_id + 4; + fidx++) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n", + fidx); + return -EBUSY; + } + } + } else { + /* For T6, CLIP being enabled, IPv6 filter would occupy + * 2 entries. + */ + if (filter_id & 0x1) + return -EINVAL; + /* Check overlapping IPv4 filter slot */ + fidx = filter_id + 1; f = &adapter->tids.ftid_tab[fidx]; if (f->valid) { - dev_err(adapter->pdev_dev, - "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n", - fidx); - return -EINVAL; + pr_err("%s: IPv6 filter requires 2 indices. IPv4 filter already present at %d. Please remove IPv4 filter first.\n", + __func__, fidx); + return -EBUSY; } } } @@ -1279,16 +1320,18 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, fidx = filter_id + adapter->tids.ftid_base; ret = cxgb4_set_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET); + fs->type ? PF_INET6 : PF_INET, + chip_ver); if (ret) return ret; - /* Check to make sure the filter requested is writable ... */ + /* Check t make sure the filter requested is writable ... */ ret = writable_filter(f); if (ret) { /* Clear the bits we have set above */ cxgb4_clear_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET); + fs->type ? PF_INET6 : PF_INET, + chip_ver); return ret; } @@ -1303,7 +1346,8 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, IPV6_ADDR_ANY) { ret = cxgb4_clip_get(dev, (const u32 *)&fs->val.lip, 1); if (ret) { - cxgb4_clear_ftid(&adapter->tids, filter_id, PF_INET6); + cxgb4_clear_ftid(&adapter->tids, filter_id, PF_INET6, + chip_ver); return ret; } } @@ -1333,7 +1377,8 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, ret = set_filter_wr(adapter, filter_id); if (ret) { cxgb4_clear_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET); + fs->type ? PF_INET6 : PF_INET, + chip_ver); clear_filter(adapter, f); } @@ -1411,6 +1456,7 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); struct filter_entry *f; unsigned int max_fidx; int ret; @@ -1436,7 +1482,8 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, if (f->valid) { f->ctx = ctx; cxgb4_clear_ftid(&adapter->tids, filter_id, - f->fs.type ? PF_INET6 : PF_INET); + f->fs.type ? PF_INET6 : PF_INET, + chip_ver); return del_filter_wr(adapter, filter_id); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 11fe5961040a..f0fd2eba30c2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2783,7 +2783,30 @@ static int cxgb4_mgmt_set_vf_rate(struct net_device *dev, int vf, return 0; } -#endif +static int cxgb4_mgmt_set_vf_vlan(struct net_device *dev, int vf, + u16 vlan, u8 qos, __be16 vlan_proto) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + int ret; + + if (vf >= adap->num_vfs || vlan > 4095 || qos > 7) + return -EINVAL; + + if (vlan_proto != htons(ETH_P_8021Q) || qos != 0) + return -EPROTONOSUPPORT; + + ret = t4_set_vlan_acl(adap, adap->mbox, vf + 1, vlan); + if (!ret) { + adap->vfinfo[vf].vlan = vlan; + return 0; + } + + dev_err(adap->pdev_dev, "Err %d %s VLAN ACL for PF/VF %d/%d\n", + ret, (vlan ? "setting" : "clearing"), adap->pf, vf); + return ret; +} +#endif /* CONFIG_PCI_IOV */ static int cxgb_set_mac_addr(struct net_device *dev, void *p) { @@ -3020,6 +3043,17 @@ static void cxgb_del_udp_tunnel(struct net_device *netdev, adapter->vxlan_port = 0; t4_write_reg(adapter, MPS_RX_VXLAN_TYPE_A, 0); break; + case UDP_TUNNEL_TYPE_GENEVE: + if (!adapter->geneve_port_cnt || + adapter->geneve_port != ti->port) + return; /* Invalid GENEVE destination port */ + + adapter->geneve_port_cnt--; + if (adapter->geneve_port_cnt) + return; + + adapter->geneve_port = 0; + t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, 0); default: return; } @@ -3055,17 +3089,11 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev, u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; int i, ret; - if (chip_ver < CHELSIO_T6) + if (chip_ver < CHELSIO_T6 || !adapter->rawf_cnt) return; switch (ti->type) { case UDP_TUNNEL_TYPE_VXLAN: - /* For T6 fw reserves last 2 entries for - * storing match all mac filter (config file entry). - */ - if (!adapter->rawf_cnt) - return; - /* Callback for adding vxlan port can be called with the same * port for both IPv4 and IPv6. We should not disable the * offloading when the same port for both protocols is added @@ -3091,6 +3119,26 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev, t4_write_reg(adapter, MPS_RX_VXLAN_TYPE_A, VXLAN_V(be16_to_cpu(ti->port)) | VXLAN_EN_F); break; + case UDP_TUNNEL_TYPE_GENEVE: + if (adapter->geneve_port_cnt && + adapter->geneve_port == ti->port) { + adapter->geneve_port_cnt++; + return; + } + + /* We will support only one GENEVE port */ + if (adapter->geneve_port_cnt) { + netdev_info(netdev, "UDP port %d already offloaded, not adding port %d\n", + be16_to_cpu(adapter->geneve_port), + be16_to_cpu(ti->port)); + return; + } + + adapter->geneve_port = ti->port; + adapter->geneve_port_cnt = 1; + + t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, + GENEVE_V(be16_to_cpu(ti->port)) | GENEVE_EN_F); default: return; } @@ -3101,24 +3149,22 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev, * we will remove this 'match all' entry and fallback to adding * exact match filters. */ - if (adapter->rawf_cnt) { - for_each_port(adapter, i) { - pi = adap2pinfo(adapter, i); - - ret = t4_alloc_raw_mac_filt(adapter, pi->viid, - match_all_mac, - match_all_mac, - adapter->rawf_start + - pi->port_id, - 1, pi->port_id, true); - if (ret < 0) { - netdev_info(netdev, "Failed to allocate a mac filter entry, not adding port %d\n", - be16_to_cpu(ti->port)); - cxgb_del_udp_tunnel(netdev, ti); - return; - } - atomic_inc(&adapter->mps_encap[ret].refcnt); + for_each_port(adapter, i) { + pi = adap2pinfo(adapter, i); + + ret = t4_alloc_raw_mac_filt(adapter, pi->viid, + match_all_mac, + match_all_mac, + adapter->rawf_start + + pi->port_id, + 1, pi->port_id, true); + if (ret < 0) { + netdev_info(netdev, "Failed to allocate a mac filter entry, not adding port %d\n", + be16_to_cpu(ti->port)); + cxgb_del_udp_tunnel(netdev, ti); + return; } + atomic_inc(&adapter->mps_encap[ret].refcnt); } } @@ -3184,6 +3230,7 @@ static const struct net_device_ops cxgb4_mgmt_netdev_ops = { .ndo_get_vf_config = cxgb4_mgmt_get_vf_config, .ndo_set_vf_rate = cxgb4_mgmt_set_vf_rate, .ndo_get_phys_port_id = cxgb4_mgmt_get_phys_port_id, + .ndo_set_vf_vlan = cxgb4_mgmt_set_vf_vlan, }; #endif diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 9b9f3f99b39d..36563364bae7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -43,7 +43,7 @@ #define STATS_CHECK_PERIOD (HZ / 2) -struct ch_tc_pedit_fields pedits[] = { +static struct ch_tc_pedit_fields pedits[] = { PEDIT_FIELDS(ETH_, DMAC_31_0, 4, dmac, 0), PEDIT_FIELDS(ETH_, DMAC_47_32, 2, dmac, 4), PEDIT_FIELDS(ETH_, SMAC_15_0, 2, smac, 0), @@ -111,6 +111,9 @@ static void cxgb4_process_flow_match(struct net_device *dev, ethtype_mask = 0; } + if (ethtype_key == ETH_P_IPV6) + fs->type = 1; + fs->val.ethtype = ethtype_key; fs->mask.ethtype = ethtype_mask; fs->val.proto = key->ip_proto; @@ -205,8 +208,8 @@ static void cxgb4_process_flow_match(struct net_device *dev, VLAN_PRIO_SHIFT); vlan_tci_mask = mask->vlan_id | (mask->vlan_priority << VLAN_PRIO_SHIFT); - fs->val.ivlan = cpu_to_be16(vlan_tci); - fs->mask.ivlan = cpu_to_be16(vlan_tci_mask); + fs->val.ivlan = vlan_tci; + fs->mask.ivlan = vlan_tci_mask; /* Chelsio adapters use ivlan_vld bit to match vlan packets * as 802.1Q. Also, when vlan tag is present in packets, diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index eab781fab2a8..a7af71bf14fb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1199,6 +1199,8 @@ enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb) case IPPROTO_UDP: if (adapter->vxlan_port == udp_hdr(skb)->dest) tnl_type = TX_TNL_TYPE_VXLAN; + else if (adapter->geneve_port == udp_hdr(skb)->dest) + tnl_type = TX_TNL_TYPE_GENEVE; break; default: return tnl_type; @@ -1238,6 +1240,7 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb, switch (tnl_type) { case TX_TNL_TYPE_VXLAN: + case TX_TNL_TYPE_GENEVE: tnl_lso->UdpLenSetOut_to_TnlHdrLen = htons(CPL_TX_TNL_LSO_UDPCHKCLROUT_F | CPL_TX_TNL_LSO_UDPLENSETOUT_F); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 6d76851a4da9..af27d2b0f79f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -317,9 +317,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, * wait [for a while] till we're at the front [or bail out with an * EBUSY] ... */ - spin_lock(&adap->mbox_lock); + spin_lock_bh(&adap->mbox_lock); list_add_tail(&entry.list, &adap->mlist.list); - spin_unlock(&adap->mbox_lock); + spin_unlock_bh(&adap->mbox_lock); delay_idx = 0; ms = delay[0]; @@ -332,9 +332,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, */ pcie_fw = t4_read_reg(adap, PCIE_FW_A); if (i > FW_CMD_MAX_TIMEOUT || (pcie_fw & PCIE_FW_ERR_F)) { - spin_lock(&adap->mbox_lock); + spin_lock_bh(&adap->mbox_lock); list_del(&entry.list); - spin_unlock(&adap->mbox_lock); + spin_unlock_bh(&adap->mbox_lock); ret = (pcie_fw & PCIE_FW_ERR_F) ? -ENXIO : -EBUSY; t4_record_mbox(adap, cmd, size, access, ret); return ret; @@ -365,9 +365,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, for (i = 0; v == MBOX_OWNER_NONE && i < 3; i++) v = MBOWNER_G(t4_read_reg(adap, ctl_reg)); if (v != MBOX_OWNER_DRV) { - spin_lock(&adap->mbox_lock); + spin_lock_bh(&adap->mbox_lock); list_del(&entry.list); - spin_unlock(&adap->mbox_lock); + spin_unlock_bh(&adap->mbox_lock); ret = (v == MBOX_OWNER_FW) ? -EBUSY : -ETIMEDOUT; t4_record_mbox(adap, cmd, size, access, ret); return ret; @@ -418,9 +418,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, execute = i + ms; t4_record_mbox(adap, cmd_rpl, MBOX_LEN, access, execute); - spin_lock(&adap->mbox_lock); + spin_lock_bh(&adap->mbox_lock); list_del(&entry.list); - spin_unlock(&adap->mbox_lock); + spin_unlock_bh(&adap->mbox_lock); return -FW_CMD_RETVAL_G((int)res); } } @@ -430,9 +430,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, dev_err(adap->pdev_dev, "command %#x in mailbox %d timed out\n", *(const u8 *)cmd, mbox); t4_report_fw_error(adap); - spin_lock(&adap->mbox_lock); + spin_lock_bh(&adap->mbox_lock); list_del(&entry.list); - spin_unlock(&adap->mbox_lock); + spin_unlock_bh(&adap->mbox_lock); t4_fatal_err(adap); return ret; } @@ -9899,3 +9899,35 @@ int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port, return ret; } + +/** + * t4_set_vlan_acl - Set a VLAN id for the specified VF + * @adapter: the adapter + * @mbox: mailbox to use for the FW command + * @vf: one of the VFs instantiated by the specified PF + * @vlan: The vlanid to be set + */ +int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf, + u16 vlan) +{ + struct fw_acl_vlan_cmd vlan_cmd; + unsigned int enable; + + enable = (vlan ? FW_ACL_VLAN_CMD_EN_F : 0); + memset(&vlan_cmd, 0, sizeof(vlan_cmd)); + vlan_cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_VLAN_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | + FW_CMD_EXEC_F | + FW_ACL_VLAN_CMD_PFN_V(adap->pf) | + FW_ACL_VLAN_CMD_VFN_V(vf)); + vlan_cmd.en_to_len16 = cpu_to_be32(enable | FW_LEN16(vlan_cmd)); + /* Drop all packets that donot match vlan id */ + vlan_cmd.dropnovlan_fm = FW_ACL_VLAN_CMD_FM_F; + if (enable != 0) { + vlan_cmd.nvlan = 1; + vlan_cmd.vlanid[0] = cpu_to_be16(vlan); + } + + return t4_wr_mbox(adap, adap->mbox, &vlan_cmd, sizeof(vlan_cmd), NULL); +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index d9c06d6dc7b2..a6df73398d17 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -2522,6 +2522,17 @@ #define VXLAN_V(x) ((x) << VXLAN_S) #define VXLAN_G(x) (((x) >> VXLAN_S) & VXLAN_M) +#define MPS_RX_GENEVE_TYPE_A 0x11238 + +#define GENEVE_EN_S 16 +#define GENEVE_EN_V(x) ((x) << GENEVE_EN_S) +#define GENEVE_EN_F GENEVE_EN_V(1U) + +#define GENEVE_S 0 +#define GENEVE_M 0xffffU +#define GENEVE_V(x) ((x) << GENEVE_S) +#define GENEVE_G(x) (((x) >> GENEVE_S) & GENEVE_M) + #define MPS_CLS_TCAM_Y_L_A 0xf000 #define MPS_CLS_TCAM_DATA0_A 0xf000 #define MPS_CLS_TCAM_DATA1_A 0xf004 diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index f3310d5b3c4c..f88766d2401d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -2353,14 +2353,22 @@ struct fw_acl_vlan_cmd { #define FW_ACL_VLAN_CMD_VFN_S 0 #define FW_ACL_VLAN_CMD_VFN_V(x) ((x) << FW_ACL_VLAN_CMD_VFN_S) -#define FW_ACL_VLAN_CMD_EN_S 31 -#define FW_ACL_VLAN_CMD_EN_V(x) ((x) << FW_ACL_VLAN_CMD_EN_S) +#define FW_ACL_VLAN_CMD_EN_S 31 +#define FW_ACL_VLAN_CMD_EN_M 0x1 +#define FW_ACL_VLAN_CMD_EN_V(x) ((x) << FW_ACL_VLAN_CMD_EN_S) +#define FW_ACL_VLAN_CMD_EN_G(x) \ + (((x) >> S_FW_ACL_VLAN_CMD_EN_S) & FW_ACL_VLAN_CMD_EN_M) +#define FW_ACL_VLAN_CMD_EN_F FW_ACL_VLAN_CMD_EN_V(1U) #define FW_ACL_VLAN_CMD_DROPNOVLAN_S 7 #define FW_ACL_VLAN_CMD_DROPNOVLAN_V(x) ((x) << FW_ACL_VLAN_CMD_DROPNOVLAN_S) -#define FW_ACL_VLAN_CMD_FM_S 6 -#define FW_ACL_VLAN_CMD_FM_V(x) ((x) << FW_ACL_VLAN_CMD_FM_S) +#define FW_ACL_VLAN_CMD_FM_S 6 +#define FW_ACL_VLAN_CMD_FM_M 0x1 +#define FW_ACL_VLAN_CMD_FM_V(x) ((x) << FW_ACL_VLAN_CMD_FM_S) +#define FW_ACL_VLAN_CMD_FM_G(x) \ + (((x) >> FW_ACL_VLAN_CMD_FM_S) & FW_ACL_VLAN_CMD_FM_M) +#define FW_ACL_VLAN_CMD_FM_F FW_ACL_VLAN_CMD_FM_V(1U) /* old 16-bit port capabilities bitmap (fw_port_cap16_t) */ enum fw_port_cap { diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h index 08c6ddb84a04..5883f09e3804 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h @@ -92,6 +92,7 @@ struct sge_rspq; */ struct port_info { struct adapter *adapter; /* our adapter */ + u32 vlan_id; /* vlan id for VST */ u16 viid; /* virtual interface ID */ s16 xact_addr_filt; /* index of our MAC address filter */ u16 rss_size; /* size of VI's RSS table slice */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 96f69f80dc99..b7e79e64d2ed 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -791,6 +791,8 @@ static int cxgb4vf_open(struct net_device *dev) if (err) goto err_unwind; + pi->vlan_id = t4vf_get_vf_vlan_acl(adapter); + netif_tx_start_all_queues(dev); set_bit(pi->port_id, &adapter->open_device_map); return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 129b914a434c..dfce5df7538e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1202,6 +1202,10 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) BUG_ON(qidx >= pi->nqsets); txq = &adapter->sge.ethtxq[pi->first_qset + qidx]; + if (pi->vlan_id && !skb_vlan_tag_present(skb)) + __vlan_hwaccel_put_tag(skb, cpu_to_be16(ETH_P_8021Q), + pi->vlan_id); + /* * Take this opportunity to reclaim any TX Descriptors whose DMA * transfers have completed. @@ -1570,6 +1574,7 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, { struct adapter *adapter = rxq->rspq.adapter; struct sge *s = &adapter->sge; + struct port_info *pi; int ret; struct sk_buff *skb; @@ -1586,8 +1591,9 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, skb->truesize += skb->data_len; skb->ip_summed = CHECKSUM_UNNECESSARY; skb_record_rx_queue(skb, rxq->rspq.idx); + pi = netdev_priv(skb->dev); - if (pkt->vlan_ex) { + if (pkt->vlan_ex && !pi->vlan_id) { __vlan_hwaccel_put_tag(skb, cpu_to_be16(ETH_P_8021Q), be16_to_cpu(pkt->vlan)); rxq->stats.vlan_ex++; @@ -1620,6 +1626,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq); struct adapter *adapter = rspq->adapter; struct sge *s = &adapter->sge; + struct port_info *pi; /* * If this is a good TCP packet and we have Generic Receive Offload @@ -1644,6 +1651,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, __skb_pull(skb, s->pktshift); skb->protocol = eth_type_trans(skb, rspq->netdev); skb_record_rx_queue(skb, rspq->idx); + pi = netdev_priv(skb->dev); rxq->stats.pkts++; if (csum_ok && !pkt->err_vec && @@ -1660,9 +1668,10 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, } else skb_checksum_none_assert(skb); - if (pkt->vlan_ex) { + if (pkt->vlan_ex && !pi->vlan_id) { rxq->stats.vlan_ex++; - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(pkt->vlan)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + be16_to_cpu(pkt->vlan)); } netif_receive_skb(skb); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 9cf9c56b0f73..712e8f0c71b4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -413,5 +413,6 @@ int t4vf_handle_fw_rpl(struct adapter *, const __be64 *); int t4vf_prep_adapter(struct adapter *); int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf, unsigned int *naddr, u8 *addr); +int t4vf_get_vf_vlan_acl(struct adapter *adapter); #endif /* __T4VF_COMMON_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 67aec59a14e6..798695bf8678 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -2147,3 +2147,31 @@ int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf, return ret; } + +/** + * t4vf_get_vf_vlan_acl - Get the VLAN ID to be set to + * the VI of this VF. + * @adapter: The adapter + * + * Find the VLAN ID to be set to the VF's VI. The requested VLAN ID + * is from the host OS via callback in the PF driver. + */ +int t4vf_get_vf_vlan_acl(struct adapter *adapter) +{ + struct fw_acl_vlan_cmd cmd; + int vlan = 0; + int ret = 0; + + cmd.op_to_vfn = htonl(FW_CMD_OP_V(FW_ACL_VLAN_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F); + + /* Note: Do not enable the ACL */ + cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd)); + + ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &cmd); + + if (!ret) + vlan = be16_to_cpu(cmd.vlanid[0]); + + return vlan; +} diff --git a/drivers/net/ethernet/cortina/Kconfig b/drivers/net/ethernet/cortina/Kconfig index 0df743ea51f1..89bc4579724d 100644 --- a/drivers/net/ethernet/cortina/Kconfig +++ b/drivers/net/ethernet/cortina/Kconfig @@ -14,6 +14,7 @@ if NET_VENDOR_CORTINA config GEMINI_ETHERNET tristate "Gemini Gigabit Ethernet support" depends on OF + depends on HAS_IOMEM select PHYLIB select CRC32 ---help--- diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c6e859a27ee6..e180657a02ef 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4634,6 +4634,15 @@ int be_update_queues(struct be_adapter *adapter) be_schedule_worker(adapter); + /* + * The IF was destroyed and re-created. We need to clear + * all promiscuous flags valid for the destroyed IF. + * Without this promisc mode is not restored during + * be_open() because the driver thinks that it is + * already enabled in HW. + */ + adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS; + if (netif_running(netdev)) status = be_open(netdev); diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 5385074b3b7d..e7381f8ef89d 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -20,7 +20,8 @@ #include <linux/timecounter.h> #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ - defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) + defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \ + defined(CONFIG_ARM64) /* * Just figures, Motorola would have to change the offsets for * registers in the same peripheral device on different models @@ -195,7 +196,7 @@ * Evidently, ARM SoCs have the FEC block generated in a * little endian mode so adjust endianness accordingly. */ -#if defined(CONFIG_ARM) +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) #define fec32_to_cpu le32_to_cpu #define fec16_to_cpu le16_to_cpu #define cpu_to_fec32 cpu_to_le32 diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 90aa69a08922..7a7f3a42b2aa 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -195,7 +195,8 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); * account when setting it. */ #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ - defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) + defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \ + defined(CONFIG_ARM64) #define OPT_FRAME_SIZE (PKT_MAXBUF_SIZE << 16) #else #define OPT_FRAME_SIZE 0 @@ -2109,7 +2110,8 @@ static int fec_enet_get_regs_len(struct net_device *ndev) /* List of registers that can be safety be read to dump them with ethtool */ #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ - defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) + defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \ + defined(CONFIG_ARM64) static u32 fec_enet_register_offset[] = { FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0, FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL, @@ -3139,7 +3141,7 @@ static int fec_enet_init(struct net_device *ndev) unsigned dsize_log2 = __fls(dsize); WARN_ON(dsize != (1 << dsize_log2)); -#if defined(CONFIG_ARM) +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) fep->rx_align = 0xf; fep->tx_align = 0xf; #else diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 7892f2f0c6b5..2c2976a2dda6 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -613,9 +613,11 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void fs_timeout(struct net_device *dev) +static void fs_timeout_work(struct work_struct *work) { - struct fs_enet_private *fep = netdev_priv(dev); + struct fs_enet_private *fep = container_of(work, struct fs_enet_private, + timeout_work); + struct net_device *dev = fep->ndev; unsigned long flags; int wake = 0; @@ -627,7 +629,6 @@ static void fs_timeout(struct net_device *dev) phy_stop(dev->phydev); (*fep->ops->stop)(dev); (*fep->ops->restart)(dev); - phy_start(dev->phydev); } phy_start(dev->phydev); @@ -639,6 +640,13 @@ static void fs_timeout(struct net_device *dev) netif_wake_queue(dev); } +static void fs_timeout(struct net_device *dev) +{ + struct fs_enet_private *fep = netdev_priv(dev); + + schedule_work(&fep->timeout_work); +} + /*----------------------------------------------------------------------------- * generic link-change handler - should be sufficient for most cases *-----------------------------------------------------------------------------*/ @@ -759,6 +767,7 @@ static int fs_enet_close(struct net_device *dev) netif_stop_queue(dev); netif_carrier_off(dev); napi_disable(&fep->napi); + cancel_work_sync(&fep->timeout_work); phy_stop(dev->phydev); spin_lock_irqsave(&fep->lock, flags); @@ -1019,6 +1028,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; + INIT_WORK(&fep->timeout_work, fs_timeout_work); netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index 92e06b37a199..195fae6aec4a 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -125,6 +125,7 @@ struct fs_enet_private { spinlock_t lock; /* during all ops except TX pckt processing */ spinlock_t tx_lock; /* during fs_start_xmit and fs_tx */ struct fs_platform_info *fpi; + struct work_struct timeout_work; const struct fs_ops *ops; int rx_ring, tx_ring; dma_addr_t ring_mem_addr; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index ca247c2cc238..acf29633ec79 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -44,12 +44,17 @@ static void dsaf_write_sub(struct dsaf_device *dsaf_dev, u32 reg, u32 val) static u32 dsaf_read_sub(struct dsaf_device *dsaf_dev, u32 reg) { - u32 ret; - - if (dsaf_dev->sub_ctrl) - ret = dsaf_read_syscon(dsaf_dev->sub_ctrl, reg); - else + u32 ret = 0; + int err; + + if (dsaf_dev->sub_ctrl) { + err = dsaf_read_syscon(dsaf_dev->sub_ctrl, reg, &ret); + if (err) + dev_err(dsaf_dev->dev, "dsaf_read_syscon error %d!\n", + err); + } else { ret = dsaf_read_reg(dsaf_dev->sc_base, reg); + } return ret; } @@ -188,18 +193,23 @@ static void cpld_led_reset_acpi(struct hns_mac_cb *mac_cb) static int cpld_set_led_id(struct hns_mac_cb *mac_cb, enum hnae_led_state status) { + u32 val = 0; + int ret; + if (!mac_cb->cpld_ctrl) return 0; switch (status) { case HNAE_LED_ACTIVE: - mac_cb->cpld_led_value = - dsaf_read_syscon(mac_cb->cpld_ctrl, - mac_cb->cpld_ctrl_reg); - dsaf_set_bit(mac_cb->cpld_led_value, DSAF_LED_ANCHOR_B, - CPLD_LED_ON_VALUE); + ret = dsaf_read_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg, + &val); + if (ret) + return ret; + + dsaf_set_bit(val, DSAF_LED_ANCHOR_B, CPLD_LED_ON_VALUE); dsaf_write_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg, - mac_cb->cpld_led_value); + val); + mac_cb->cpld_led_value = val; break; case HNAE_LED_INACTIVE: dsaf_set_bit(mac_cb->cpld_led_value, DSAF_LED_ANCHOR_B, @@ -560,12 +570,19 @@ static phy_interface_t hns_mac_get_phy_if_acpi(struct hns_mac_cb *mac_cb) int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt) { + u32 val = 0; + int ret; + if (!mac_cb->cpld_ctrl) return -ENODEV; - *sfp_prsnt = !dsaf_read_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg - + MAC_SFP_PORT_OFFSET); + ret = dsaf_read_syscon(mac_cb->cpld_ctrl, + mac_cb->cpld_ctrl_reg + MAC_SFP_PORT_OFFSET, + &val); + if (ret) + return ret; + *sfp_prsnt = !val; return 0; } @@ -615,7 +632,7 @@ static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en) #define RX_CSR(lane, reg) ((0x4080 + (reg) * 0x0002 + (lane) * 0x0200) * 2) u64 reg_offset = RX_CSR(lane_id[mac_cb->mac_id], 0); - int sfp_prsnt; + int sfp_prsnt = 0; int ret = hns_mac_get_sfp_prsnt(mac_cb, &sfp_prsnt); if (!mac_cb->phy_dev) { @@ -627,7 +644,7 @@ static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en) } if (mac_cb->serdes_ctrl) { - u32 origin; + u32 origin = 0; if (!AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver)) { #define HILINK_ACCESS_SEL_CFG 0x40008 @@ -644,7 +661,10 @@ static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en) HILINK_ACCESS_SEL_CFG, 3); } - origin = dsaf_read_syscon(mac_cb->serdes_ctrl, reg_offset); + ret = dsaf_read_syscon(mac_cb->serdes_ctrl, reg_offset, + &origin); + if (ret) + return ret; dsaf_set_field(origin, 1ull << 10, 10, en); dsaf_write_syscon(mac_cb->serdes_ctrl, reg_offset, origin); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 46a52d9bb196..886cbbf25761 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -1034,12 +1034,9 @@ static inline void dsaf_write_syscon(struct regmap *base, u32 reg, u32 value) regmap_write(base, reg, value); } -static inline u32 dsaf_read_syscon(struct regmap *base, u32 reg) +static inline int dsaf_read_syscon(struct regmap *base, u32 reg, u32 *val) { - unsigned int val; - - regmap_read(base, reg, &val); - return val; + return regmap_read(base, reg, val); } #define dsaf_read_dev(a, reg) \ diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 634e9327968b..fd06bc78c58e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -356,7 +356,8 @@ struct hnae3_ae_ops { u32 stringset, u8 *data); int (*get_sset_count)(struct hnae3_handle *handle, int stringset); - void (*get_regs)(struct hnae3_handle *handle, void *data); + void (*get_regs)(struct hnae3_handle *handle, u32 *version, + void *data); int (*get_regs_len)(struct hnae3_handle *handle); u32 (*get_rss_key_size)(struct hnae3_handle *handle); @@ -404,6 +405,8 @@ struct hnae3_ae_ops { int (*set_channels)(struct hnae3_handle *handle, u32 new_tqps_num); void (*get_flowctrl_adv)(struct hnae3_handle *handle, u32 *flowctrl_adv); + int (*set_led_id)(struct hnae3_handle *handle, + enum ethtool_phys_id_state status); }; struct hnae3_dcb_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index ac848163ccae..601b6295d3f8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3318,8 +3318,8 @@ static int hns3_reset_notify(struct hnae3_handle *handle, switch (type) { case HNAE3_UP_CLIENT: - ret = hns3_reset_notify_up_enet(handle); - break; + ret = hns3_reset_notify_up_enet(handle); + break; case HNAE3_DOWN_CLIENT: ret = hns3_reset_notify_down_enet(handle); break; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 358f78036941..741020534b16 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -1063,6 +1063,38 @@ static int hns3_set_coalesce(struct net_device *netdev, return 0; } +static int hns3_get_regs_len(struct net_device *netdev) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (!h->ae_algo->ops->get_regs_len) + return -EOPNOTSUPP; + + return h->ae_algo->ops->get_regs_len(h); +} + +static void hns3_get_regs(struct net_device *netdev, + struct ethtool_regs *cmd, void *data) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (!h->ae_algo->ops->get_regs) + return; + + h->ae_algo->ops->get_regs(h, &cmd->version, data); +} + +static int hns3_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_led_id) + return -EOPNOTSUPP; + + return h->ae_algo->ops->set_led_id(h, state); +} + static const struct ethtool_ops hns3vf_ethtool_ops = { .get_drvinfo = hns3_get_drvinfo, .get_ringparam = hns3_get_ringparam, @@ -1103,6 +1135,9 @@ static const struct ethtool_ops hns3_ethtool_ops = { .set_channels = hns3_set_channels, .get_coalesce = hns3_get_coalesce, .set_coalesce = hns3_set_coalesce, + .get_regs_len = hns3_get_regs_len, + .get_regs = hns3_get_regs, + .set_phys_id = hns3_set_phys_id, }; void hns3_ethtool_set_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 3c3159b2d3bf..3fd10a6bec53 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -102,6 +102,10 @@ enum hclge_opcode_type { HCLGE_OPC_STATS_64_BIT = 0x0030, HCLGE_OPC_STATS_32_BIT = 0x0031, HCLGE_OPC_STATS_MAC = 0x0032, + + HCLGE_OPC_QUERY_REG_NUM = 0x0040, + HCLGE_OPC_QUERY_32_BIT_REG = 0x0041, + HCLGE_OPC_QUERY_64_BIT_REG = 0x0042, /* Device management command */ /* MAC commond */ @@ -111,6 +115,7 @@ enum hclge_opcode_type { HCLGE_OPC_QUERY_LINK_STATUS = 0x0307, HCLGE_OPC_CONFIG_MAX_FRM_SIZE = 0x0308, HCLGE_OPC_CONFIG_SPEED_DUP = 0x0309, + HCLGE_OPC_STATS_MAC_TRAFFIC = 0x0314, /* MACSEC command */ /* PFC/Pause CMD*/ @@ -223,6 +228,9 @@ enum hclge_opcode_type { /* Mailbox cmd */ HCLGEVF_OPC_MBX_PF_TO_VF = 0x2000, + + /* Led command */ + HCLGE_OPC_LED_STATUS_CFG = 0xB000, }; #define HCLGE_TQP_REG_OFFSET 0x80000 @@ -601,6 +609,28 @@ struct hclge_mac_vlan_mask_entry_cmd { u8 rsv2[14]; }; +#define HCLGE_MAC_MGR_MASK_VLAN_B BIT(0) +#define HCLGE_MAC_MGR_MASK_MAC_B BIT(1) +#define HCLGE_MAC_MGR_MASK_ETHERTYPE_B BIT(2) +#define HCLGE_MAC_ETHERTYPE_LLDP 0x88cc + +struct hclge_mac_mgr_tbl_entry_cmd { + u8 flags; + u8 resp_code; + __le16 vlan_tag; + __le32 mac_addr_hi32; + __le16 mac_addr_lo16; + __le16 rsv1; + __le16 ethter_type; + __le16 egress_port; + __le16 egress_queue; + u8 sw_port_id_aware; + u8 rsv2; + u8 i_port_bitmap; + u8 i_port_direction; + u8 rsv3[2]; +}; + #define HCLGE_CFG_MTA_MAC_SEL_S 0x0 #define HCLGE_CFG_MTA_MAC_SEL_M GENMASK(1, 0) #define HCLGE_CFG_MTA_MAC_EN_B 0x7 @@ -781,6 +811,23 @@ struct hclge_reset_cmd { #define HCLGE_NIC_CMQ_DESC_NUM 1024 #define HCLGE_NIC_CMQ_DESC_NUM_S 3 +#define HCLGE_LED_PORT_SPEED_STATE_S 0 +#define HCLGE_LED_PORT_SPEED_STATE_M GENMASK(5, 0) +#define HCLGE_LED_ACTIVITY_STATE_S 0 +#define HCLGE_LED_ACTIVITY_STATE_M GENMASK(1, 0) +#define HCLGE_LED_LINK_STATE_S 0 +#define HCLGE_LED_LINK_STATE_M GENMASK(1, 0) +#define HCLGE_LED_LOCATE_STATE_S 0 +#define HCLGE_LED_LOCATE_STATE_M GENMASK(1, 0) + +struct hclge_set_led_state_cmd { + u8 port_speed_led_config; + u8 link_led_config; + u8 activity_led_config; + u8 locate_led_config; + u8 rsv[20]; +}; + int hclge_cmd_init(struct hclge_dev *hdev); static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 27f0ab695f5a..32bc6f68e297 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -39,6 +39,7 @@ static int hclge_set_mta_filter_mode(struct hclge_dev *hdev, static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu); static int hclge_init_vlan_config(struct hclge_dev *hdev); static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev); +static int hclge_update_led_status(struct hclge_dev *hdev); static struct hnae3_ae_algo ae_algo; @@ -392,6 +393,16 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = { HCLGE_MAC_STATS_FIELD_OFF(mac_rx_send_app_bad_pkt_num)} }; +static const struct hclge_mac_mgr_tbl_entry_cmd hclge_mgr_table[] = { + { + .flags = HCLGE_MAC_MGR_MASK_VLAN_B, + .ethter_type = cpu_to_le16(HCLGE_MAC_ETHERTYPE_LLDP), + .mac_addr_hi32 = cpu_to_le32(htonl(0x0180C200)), + .mac_addr_lo16 = cpu_to_le16(htons(0x000E)), + .i_port_bitmap = 0x1, + }, +}; + static int hclge_64_bit_update_stats(struct hclge_dev *hdev) { #define HCLGE_64_BIT_CMD_NUM 5 @@ -495,6 +506,38 @@ static int hclge_32_bit_update_stats(struct hclge_dev *hdev) return 0; } +static int hclge_mac_get_traffic_stats(struct hclge_dev *hdev) +{ + struct hclge_mac_stats *mac_stats = &hdev->hw_stats.mac_stats; + struct hclge_desc desc; + __le64 *desc_data; + int ret; + + /* for fiber port, need to query the total rx/tx packets statstics, + * used for data transferring checking. + */ + if (hdev->hw.mac.media_type != HNAE3_MEDIA_TYPE_FIBER) + return 0; + + if (test_bit(HCLGE_STATE_STATISTICS_UPDATING, &hdev->state)) + return 0; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_STATS_MAC_TRAFFIC, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get MAC total pkt stats fail, ret = %d\n", ret); + + return ret; + } + + desc_data = (__le64 *)(&desc.data[0]); + mac_stats->mac_tx_total_pkt_num += le64_to_cpu(*desc_data++); + mac_stats->mac_rx_total_pkt_num += le64_to_cpu(*desc_data); + + return 0; +} + static int hclge_mac_update_stats(struct hclge_dev *hdev) { #define HCLGE_MAC_CMD_NUM 21 @@ -2836,13 +2879,20 @@ static void hclge_service_task(struct work_struct *work) struct hclge_dev *hdev = container_of(work, struct hclge_dev, service_task); + /* The total rx/tx packets statstics are wanted to be updated + * per second. Both hclge_update_stats_for_all() and + * hclge_mac_get_traffic_stats() can do it. + */ if (hdev->hw_stats.stats_timer >= HCLGE_STATS_TIMER_INTERVAL) { hclge_update_stats_for_all(hdev); hdev->hw_stats.stats_timer = 0; + } else { + hclge_mac_get_traffic_stats(hdev); } hclge_update_speed_duplex(hdev); hclge_update_link_status(hdev); + hclge_update_led_status(hdev); hclge_service_complete(hdev); } @@ -4249,6 +4299,91 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, return status; } +static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev, + u16 cmdq_resp, u8 resp_code) +{ +#define HCLGE_ETHERTYPE_SUCCESS_ADD 0 +#define HCLGE_ETHERTYPE_ALREADY_ADD 1 +#define HCLGE_ETHERTYPE_MGR_TBL_OVERFLOW 2 +#define HCLGE_ETHERTYPE_KEY_CONFLICT 3 + + int return_status; + + if (cmdq_resp) { + dev_err(&hdev->pdev->dev, + "cmdq execute failed for get_mac_ethertype_cmd_status, status=%d.\n", + cmdq_resp); + return -EIO; + } + + switch (resp_code) { + case HCLGE_ETHERTYPE_SUCCESS_ADD: + case HCLGE_ETHERTYPE_ALREADY_ADD: + return_status = 0; + break; + case HCLGE_ETHERTYPE_MGR_TBL_OVERFLOW: + dev_err(&hdev->pdev->dev, + "add mac ethertype failed for manager table overflow.\n"); + return_status = -EIO; + break; + case HCLGE_ETHERTYPE_KEY_CONFLICT: + dev_err(&hdev->pdev->dev, + "add mac ethertype failed for key conflict.\n"); + return_status = -EIO; + break; + default: + dev_err(&hdev->pdev->dev, + "add mac ethertype failed for undefined, code=%d.\n", + resp_code); + return_status = -EIO; + } + + return return_status; +} + +static int hclge_add_mgr_tbl(struct hclge_dev *hdev, + const struct hclge_mac_mgr_tbl_entry_cmd *req) +{ + struct hclge_desc desc; + u8 resp_code; + u16 retval; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_ETHTYPE_ADD, false); + memcpy(desc.data, req, sizeof(struct hclge_mac_mgr_tbl_entry_cmd)); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "add mac ethertype failed for cmd_send, ret =%d.\n", + ret); + return ret; + } + + resp_code = (le32_to_cpu(desc.data[0]) >> 8) & 0xff; + retval = le16_to_cpu(desc.retval); + + return hclge_get_mac_ethertype_cmd_status(hdev, retval, resp_code); +} + +static int init_mgr_tbl(struct hclge_dev *hdev) +{ + int ret; + int i; + + for (i = 0; i < ARRAY_SIZE(hclge_mgr_table); i++) { + ret = hclge_add_mgr_tbl(hdev, &hclge_mgr_table[i]); + if (ret) { + dev_err(&hdev->pdev->dev, + "add mac ethertype failed, ret =%d.\n", + ret); + return ret; + } + } + + return 0; +} + static void hclge_get_mac_addr(struct hnae3_handle *handle, u8 *p) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -5271,6 +5406,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + ret = init_mgr_tbl(hdev); + if (ret) { + dev_err(&pdev->dev, "manager table init fail, ret =%d\n", ret); + return ret; + } + hclge_dcb_ops_set(hdev); timer_setup(&hdev->service_timer, hclge_service_timer, 0); @@ -5544,6 +5685,318 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num) return ret; } +static int hclge_get_regs_num(struct hclge_dev *hdev, u32 *regs_num_32_bit, + u32 *regs_num_64_bit) +{ + struct hclge_desc desc; + u32 total_num; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_REG_NUM, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "Query register number cmd failed, ret = %d.\n", ret); + return ret; + } + + *regs_num_32_bit = le32_to_cpu(desc.data[0]); + *regs_num_64_bit = le32_to_cpu(desc.data[1]); + + total_num = *regs_num_32_bit + *regs_num_64_bit; + if (!total_num) + return -EINVAL; + + return 0; +} + +static int hclge_get_32_bit_regs(struct hclge_dev *hdev, u32 regs_num, + void *data) +{ +#define HCLGE_32_BIT_REG_RTN_DATANUM 8 + + struct hclge_desc *desc; + u32 *reg_val = data; + __le32 *desc_data; + int cmd_num; + int i, k, n; + int ret; + + if (regs_num == 0) + return 0; + + cmd_num = DIV_ROUND_UP(regs_num + 2, HCLGE_32_BIT_REG_RTN_DATANUM); + desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_32_BIT_REG, true); + ret = hclge_cmd_send(&hdev->hw, desc, cmd_num); + if (ret) { + dev_err(&hdev->pdev->dev, + "Query 32 bit register cmd failed, ret = %d.\n", ret); + kfree(desc); + return ret; + } + + for (i = 0; i < cmd_num; i++) { + if (i == 0) { + desc_data = (__le32 *)(&desc[i].data[0]); + n = HCLGE_32_BIT_REG_RTN_DATANUM - 2; + } else { + desc_data = (__le32 *)(&desc[i]); + n = HCLGE_32_BIT_REG_RTN_DATANUM; + } + for (k = 0; k < n; k++) { + *reg_val++ = le32_to_cpu(*desc_data++); + + regs_num--; + if (!regs_num) + break; + } + } + + kfree(desc); + return 0; +} + +static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num, + void *data) +{ +#define HCLGE_64_BIT_REG_RTN_DATANUM 4 + + struct hclge_desc *desc; + u64 *reg_val = data; + __le64 *desc_data; + int cmd_num; + int i, k, n; + int ret; + + if (regs_num == 0) + return 0; + + cmd_num = DIV_ROUND_UP(regs_num + 1, HCLGE_64_BIT_REG_RTN_DATANUM); + desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_64_BIT_REG, true); + ret = hclge_cmd_send(&hdev->hw, desc, cmd_num); + if (ret) { + dev_err(&hdev->pdev->dev, + "Query 64 bit register cmd failed, ret = %d.\n", ret); + kfree(desc); + return ret; + } + + for (i = 0; i < cmd_num; i++) { + if (i == 0) { + desc_data = (__le64 *)(&desc[i].data[0]); + n = HCLGE_64_BIT_REG_RTN_DATANUM - 1; + } else { + desc_data = (__le64 *)(&desc[i]); + n = HCLGE_64_BIT_REG_RTN_DATANUM; + } + for (k = 0; k < n; k++) { + *reg_val++ = le64_to_cpu(*desc_data++); + + regs_num--; + if (!regs_num) + break; + } + } + + kfree(desc); + return 0; +} + +static int hclge_get_regs_len(struct hnae3_handle *handle) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 regs_num_32_bit, regs_num_64_bit; + int ret; + + ret = hclge_get_regs_num(hdev, ®s_num_32_bit, ®s_num_64_bit); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get register number failed, ret = %d.\n", ret); + return -EOPNOTSUPP; + } + + return regs_num_32_bit * sizeof(u32) + regs_num_64_bit * sizeof(u64); +} + +static void hclge_get_regs(struct hnae3_handle *handle, u32 *version, + void *data) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 regs_num_32_bit, regs_num_64_bit; + int ret; + + *version = hdev->fw_version; + + ret = hclge_get_regs_num(hdev, ®s_num_32_bit, ®s_num_64_bit); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get register number failed, ret = %d.\n", ret); + return; + } + + ret = hclge_get_32_bit_regs(hdev, regs_num_32_bit, data); + if (ret) { + dev_err(&hdev->pdev->dev, + "Get 32 bit register failed, ret = %d.\n", ret); + return; + } + + data = (u32 *)data + regs_num_32_bit; + ret = hclge_get_64_bit_regs(hdev, regs_num_64_bit, + data); + if (ret) + dev_err(&hdev->pdev->dev, + "Get 64 bit register failed, ret = %d.\n", ret); +} + +static int hclge_set_led_status_sfp(struct hclge_dev *hdev, u8 speed_led_status, + u8 act_led_status, u8 link_led_status, + u8 locate_led_status) +{ + struct hclge_set_led_state_cmd *req; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_LED_STATUS_CFG, false); + + req = (struct hclge_set_led_state_cmd *)desc.data; + hnae_set_field(req->port_speed_led_config, HCLGE_LED_PORT_SPEED_STATE_M, + HCLGE_LED_PORT_SPEED_STATE_S, speed_led_status); + hnae_set_field(req->link_led_config, HCLGE_LED_ACTIVITY_STATE_M, + HCLGE_LED_ACTIVITY_STATE_S, act_led_status); + hnae_set_field(req->activity_led_config, HCLGE_LED_LINK_STATE_M, + HCLGE_LED_LINK_STATE_S, link_led_status); + hnae_set_field(req->locate_led_config, HCLGE_LED_LOCATE_STATE_M, + HCLGE_LED_LOCATE_STATE_S, locate_led_status); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "Send set led state cmd error, ret =%d\n", ret); + + return ret; +} + +enum hclge_led_status { + HCLGE_LED_OFF, + HCLGE_LED_ON, + HCLGE_LED_NO_CHANGE = 0xFF, +}; + +static int hclge_set_led_id(struct hnae3_handle *handle, + enum ethtool_phys_id_state status) +{ +#define BLINK_FREQUENCY 2 + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + struct phy_device *phydev = hdev->hw.mac.phydev; + int ret = 0; + + if (phydev || hdev->hw.mac.media_type != HNAE3_MEDIA_TYPE_FIBER) + return -EOPNOTSUPP; + + switch (status) { + case ETHTOOL_ID_ACTIVE: + ret = hclge_set_led_status_sfp(hdev, + HCLGE_LED_NO_CHANGE, + HCLGE_LED_NO_CHANGE, + HCLGE_LED_NO_CHANGE, + HCLGE_LED_ON); + break; + case ETHTOOL_ID_INACTIVE: + ret = hclge_set_led_status_sfp(hdev, + HCLGE_LED_NO_CHANGE, + HCLGE_LED_NO_CHANGE, + HCLGE_LED_NO_CHANGE, + HCLGE_LED_OFF); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +enum hclge_led_port_speed { + HCLGE_SPEED_LED_FOR_1G, + HCLGE_SPEED_LED_FOR_10G, + HCLGE_SPEED_LED_FOR_25G, + HCLGE_SPEED_LED_FOR_40G, + HCLGE_SPEED_LED_FOR_50G, + HCLGE_SPEED_LED_FOR_100G, +}; + +static u8 hclge_led_get_speed_status(u32 speed) +{ + u8 speed_led; + + switch (speed) { + case HCLGE_MAC_SPEED_1G: + speed_led = HCLGE_SPEED_LED_FOR_1G; + break; + case HCLGE_MAC_SPEED_10G: + speed_led = HCLGE_SPEED_LED_FOR_10G; + break; + case HCLGE_MAC_SPEED_25G: + speed_led = HCLGE_SPEED_LED_FOR_25G; + break; + case HCLGE_MAC_SPEED_40G: + speed_led = HCLGE_SPEED_LED_FOR_40G; + break; + case HCLGE_MAC_SPEED_50G: + speed_led = HCLGE_SPEED_LED_FOR_50G; + break; + case HCLGE_MAC_SPEED_100G: + speed_led = HCLGE_SPEED_LED_FOR_100G; + break; + default: + speed_led = HCLGE_LED_NO_CHANGE; + } + + return speed_led; +} + +static int hclge_update_led_status(struct hclge_dev *hdev) +{ + u8 port_speed_status, link_status, activity_status; + u64 rx_pkts, tx_pkts; + + if (hdev->hw.mac.media_type != HNAE3_MEDIA_TYPE_FIBER) + return 0; + + port_speed_status = hclge_led_get_speed_status(hdev->hw.mac.speed); + + rx_pkts = hdev->hw_stats.mac_stats.mac_rx_total_pkt_num; + tx_pkts = hdev->hw_stats.mac_stats.mac_tx_total_pkt_num; + if (rx_pkts != hdev->rx_pkts_for_led || + tx_pkts != hdev->tx_pkts_for_led) + activity_status = HCLGE_LED_ON; + else + activity_status = HCLGE_LED_OFF; + hdev->rx_pkts_for_led = rx_pkts; + hdev->tx_pkts_for_led = tx_pkts; + + if (hdev->hw.mac.link) + link_status = HCLGE_LED_ON; + else + link_status = HCLGE_LED_OFF; + + return hclge_set_led_status_sfp(hdev, port_speed_status, + activity_status, link_status, + HCLGE_LED_NO_CHANGE); +} + static const struct hnae3_ae_ops hclge_ops = { .init_ae_dev = hclge_init_ae_dev, .uninit_ae_dev = hclge_uninit_ae_dev, @@ -5595,6 +6048,9 @@ static const struct hnae3_ae_ops hclge_ops = { .set_channels = hclge_set_channels, .get_channels = hclge_get_channels, .get_flowctrl_adv = hclge_get_flowctrl_adv, + .get_regs_len = hclge_get_regs_len, + .get_regs = hclge_get_regs, + .set_led_id = hclge_set_led_id, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index eeb6c8d66e4e..d99a76a9557c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -550,6 +550,9 @@ struct hclge_dev { bool accept_mta_mc; /* Whether accept mta filter multicast */ struct hclge_vlan_type_cfg vlan_type_cfg; + + u64 rx_pkts_for_led; + u64 tx_pkts_for_led; }; /* VPort level vlan tag configuration for TX direction */ diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 71ddad13baf4..354c0982847b 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -494,6 +494,9 @@ static u32 __emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_s case 16384: ret |= EMAC_MR1_RFS_16K; break; + case 8192: + ret |= EMAC4_MR1_RFS_8K; + break; case 4096: ret |= EMAC_MR1_RFS_4K; break; @@ -516,6 +519,9 @@ static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_ case 16384: ret |= EMAC4_MR1_TFS_16K; break; + case 8192: + ret |= EMAC4_MR1_TFS_8K; + break; case 4096: ret |= EMAC4_MR1_TFS_4K; break; diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h index bc14dcf27b6b..e2f80cca9bed 100644 --- a/drivers/net/ethernet/ibm/emac/emac.h +++ b/drivers/net/ethernet/ibm/emac/emac.h @@ -138,9 +138,11 @@ struct emac_regs { #define EMAC4_MR1_RFS_2K 0x00100000 #define EMAC4_MR1_RFS_4K 0x00180000 +#define EMAC4_MR1_RFS_8K 0x00200000 #define EMAC4_MR1_RFS_16K 0x00280000 #define EMAC4_MR1_TFS_2K 0x00020000 #define EMAC4_MR1_TFS_4K 0x00030000 +#define EMAC4_MR1_TFS_8K 0x00040000 #define EMAC4_MR1_TFS_16K 0x00050000 #define EMAC4_MR1_TR 0x00008000 #define EMAC4_MR1_MWSW_001 0x00001000 @@ -229,7 +231,7 @@ struct emac_regs { #define EMAC_STACR_PHYE 0x00004000 #define EMAC_STACR_STAC_MASK 0x00003000 #define EMAC_STACR_STAC_READ 0x00001000 -#define EMAC_STACR_STAC_WRITE 0x00002000 +#define EMAC_STACR_STAC_WRITE 0x00000800 #define EMAC_STACR_OPBC_MASK 0x00000C00 #define EMAC_STACR_OPBC_50 0x00000000 #define EMAC_STACR_OPBC_66 0x00000400 diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 736df59c16f5..8f2a77ecf4fb 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -411,6 +411,10 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) struct ibmvnic_rx_pool *rx_pool; int rx_scrqs; int i, j, rc; + u64 *size_array; + + size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); for (i = 0; i < rx_scrqs; i++) { @@ -418,7 +422,17 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i); - rc = reset_long_term_buff(adapter, &rx_pool->long_term_buff); + if (rx_pool->buff_size != be64_to_cpu(size_array[i])) { + free_long_term_buff(adapter, &rx_pool->long_term_buff); + rx_pool->buff_size = be64_to_cpu(size_array[i]); + alloc_long_term_buff(adapter, &rx_pool->long_term_buff, + rx_pool->size * + rx_pool->buff_size); + } else { + rc = reset_long_term_buff(adapter, + &rx_pool->long_term_buff); + } + if (rc) return rc; @@ -440,14 +454,12 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) static void release_rx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_rx_pool *rx_pool; - int rx_scrqs; int i, j; if (!adapter->rx_pool) return; - rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - for (i = 0; i < rx_scrqs; i++) { + for (i = 0; i < adapter->num_active_rx_pools; i++) { rx_pool = &adapter->rx_pool[i]; netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i); @@ -470,6 +482,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) kfree(adapter->rx_pool); adapter->rx_pool = NULL; + adapter->num_active_rx_pools = 0; } static int init_rx_pools(struct net_device *netdev) @@ -494,6 +507,8 @@ static int init_rx_pools(struct net_device *netdev) return -1; } + adapter->num_active_rx_pools = 0; + for (i = 0; i < rxadd_subcrqs; i++) { rx_pool = &adapter->rx_pool[i]; @@ -537,6 +552,8 @@ static int init_rx_pools(struct net_device *netdev) rx_pool->next_free = 0; } + adapter->num_active_rx_pools = rxadd_subcrqs; + return 0; } @@ -587,13 +604,12 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter) static void release_tx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_tx_pool *tx_pool; - int i, tx_scrqs; + int i; if (!adapter->tx_pool) return; - tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); - for (i = 0; i < tx_scrqs; i++) { + for (i = 0; i < adapter->num_active_tx_pools; i++) { netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i); tx_pool = &adapter->tx_pool[i]; kfree(tx_pool->tx_buff); @@ -604,6 +620,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter) kfree(adapter->tx_pool); adapter->tx_pool = NULL; + adapter->num_active_tx_pools = 0; } static int init_tx_pools(struct net_device *netdev) @@ -620,6 +637,8 @@ static int init_tx_pools(struct net_device *netdev) if (!adapter->tx_pool) return -1; + adapter->num_active_tx_pools = 0; + for (i = 0; i < tx_subcrqs; i++) { tx_pool = &adapter->tx_pool[i]; @@ -667,6 +686,8 @@ static int init_tx_pools(struct net_device *netdev) tx_pool->producer_index = 0; } + adapter->num_active_tx_pools = tx_subcrqs; + return 0; } @@ -861,7 +882,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) if (adapter->vpd->buff) len = adapter->vpd->len; - reinit_completion(&adapter->fw_done); + init_completion(&adapter->fw_done); crq.get_vpd_size.first = IBMVNIC_CRQ_CMD; crq.get_vpd_size.cmd = GET_VPD_SIZE; ibmvnic_send_crq(adapter, &crq); @@ -923,6 +944,13 @@ static int init_resources(struct ibmvnic_adapter *adapter) if (!adapter->vpd) return -ENOMEM; + /* Vital Product Data (VPD) */ + rc = ibmvnic_get_vpd(adapter); + if (rc) { + netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); + return rc; + } + adapter->map_id = 1; adapter->napi = kcalloc(adapter->req_rx_queues, sizeof(struct napi_struct), GFP_KERNEL); @@ -996,7 +1024,7 @@ static int __ibmvnic_open(struct net_device *netdev) static int ibmvnic_open(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int rc, vpd; + int rc; mutex_lock(&adapter->reset_lock); @@ -1019,11 +1047,6 @@ static int ibmvnic_open(struct net_device *netdev) rc = __ibmvnic_open(netdev); netif_carrier_on(netdev); - /* Vital Product Data (VPD) */ - vpd = ibmvnic_get_vpd(adapter); - if (vpd) - netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); - mutex_unlock(&adapter->reset_lock); return rc; @@ -1280,6 +1303,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned char *dst; u64 *handle_array; int index = 0; + u8 proto = 0; int ret = 0; if (adapter->resetting) { @@ -1368,17 +1392,18 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) } if (skb->protocol == htons(ETH_P_IP)) { - if (ip_hdr(skb)->version == 4) - tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4; - else if (ip_hdr(skb)->version == 6) - tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6; - - if (ip_hdr(skb)->protocol == IPPROTO_TCP) - tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP; - else if (ip_hdr(skb)->protocol != IPPROTO_TCP) - tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP; + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4; + proto = ip_hdr(skb)->protocol; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6; + proto = ipv6_hdr(skb)->nexthdr; } + if (proto == IPPROTO_TCP) + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP; + else if (proto == IPPROTO_UDP) + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP; + if (skb->ip_summed == CHECKSUM_PARTIAL) { tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD; hdrs += 2; @@ -1551,6 +1576,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) static int do_reset(struct ibmvnic_adapter *adapter, struct ibmvnic_rwi *rwi, u32 reset_state) { + u64 old_num_rx_queues, old_num_tx_queues; struct net_device *netdev = adapter->netdev; int i, rc; @@ -1560,6 +1586,9 @@ static int do_reset(struct ibmvnic_adapter *adapter, netif_carrier_off(netdev); adapter->reset_reason = rwi->reset_reason; + old_num_rx_queues = adapter->req_rx_queues; + old_num_tx_queues = adapter->req_tx_queues; + if (rwi->reset_reason == VNIC_RESET_MOBILITY) { rc = ibmvnic_reenable_crq_queue(adapter); if (rc) @@ -1604,6 +1633,12 @@ static int do_reset(struct ibmvnic_adapter *adapter, rc = init_resources(adapter); if (rc) return rc; + } else if (adapter->req_rx_queues != old_num_rx_queues || + adapter->req_tx_queues != old_num_tx_queues) { + release_rx_pools(adapter); + release_tx_pools(adapter); + init_rx_pools(netdev); + init_tx_pools(netdev); } else { rc = reset_tx_pools(adapter); if (rc) @@ -3357,7 +3392,11 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) return; } + adapter->ip_offload_ctrl.len = + cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); adapter->ip_offload_ctrl.version = cpu_to_be32(INITIAL_VERSION_IOB); + adapter->ip_offload_ctrl.ipv4_chksum = buf->ipv4_chksum; + adapter->ip_offload_ctrl.ipv6_chksum = buf->ipv6_chksum; adapter->ip_offload_ctrl.tcp_ipv4_chksum = buf->tcp_ipv4_chksum; adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum; adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum; @@ -3597,7 +3636,17 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, *req_value, (long int)be64_to_cpu(crq->request_capability_rsp. number), name); - *req_value = be64_to_cpu(crq->request_capability_rsp.number); + + if (be16_to_cpu(crq->request_capability_rsp.capability) == + REQ_MTU) { + pr_err("mtu of %llu is not supported. Reverting.\n", + *req_value); + *req_value = adapter->fallback.mtu; + } else { + *req_value = + be64_to_cpu(crq->request_capability_rsp.number); + } + ibmvnic_send_req_caps(adapter, 1); return; default: diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 2df79fdd800b..fe21a6e2ddae 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1091,6 +1091,8 @@ struct ibmvnic_adapter { u64 opt_rxba_entries_per_subcrq; __be64 tx_rx_desc_req; u8 map_id; + u64 num_active_rx_pools; + u64 num_active_tx_pools; struct tasklet_struct tasklet; enum vnic_state state; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 9f18d39bdc8f..1298b69f990b 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3303,9 +3303,11 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) if (adapter->flags & FLAG_IS_ICH) { u32 rxdctl = er32(RXDCTL(0)); - ew32(RXDCTL(0), rxdctl | 0x3); + ew32(RXDCTL(0), rxdctl | 0x3 | BIT(8)); } + dev_info(&adapter->pdev->dev, + "Some CPU C-states have been disabled in order to enable jumbo frames\n"); pm_qos_update_request(&adapter->pm_qos_req, lat); } else { pm_qos_update_request(&adapter->pm_qos_req, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index ea3ab24265ee..760cfa52d02c 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -353,7 +353,7 @@ int fm10k_iov_resume(struct pci_dev *pdev) struct fm10k_vf_info *vf_info = &iov_data->vf_info[i]; /* allocate all but the last GLORT to the VFs */ - if (i == ((~hw->mac.dglort_map) >> FM10K_DGLORTMAP_MASK_SHIFT)) + if (i == (~hw->mac.dglort_map >> FM10K_DGLORTMAP_MASK_SHIFT)) break; /* assign GLORT to VF, and restrict it to multicast */ @@ -511,7 +511,7 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs) return err; /* allocate VFs if not already allocated */ - if (num_vfs && (num_vfs != current_vfs)) { + if (num_vfs && num_vfs != current_vfs) { /* Disable completer abort error reporting as * the VFs can trigger this any time they read a queue * that they don't own. diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index adc62fb38c49..a38ae5c54da3 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -934,8 +934,12 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set) if (vid >= VLAN_N_VID) return -EINVAL; - /* Verify we have permission to add VLANs */ - if (hw->mac.vlan_override) + /* Verify that we have permission to add VLANs. If this is a request + * to remove a VLAN, we still want to allow the user to remove the + * VLAN device. In that case, we need to clear the bit in the + * active_vlans bitmask. + */ + if (set && hw->mac.vlan_override) return -EACCES; /* update active_vlans bitmask */ @@ -954,6 +958,12 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set) rx_ring->vid &= ~FM10K_VLAN_CLEAR; } + /* If our VLAN has been overridden, there is no reason to send VLAN + * removal requests as they will be silently ignored. + */ + if (hw->mac.vlan_override) + return 0; + /* Do not remove default VLAN ID related entries from VLAN and MAC * tables */ @@ -1040,14 +1050,13 @@ static int __fm10k_uc_sync(struct net_device *dev, const unsigned char *addr, bool sync) { struct fm10k_intfc *interface = netdev_priv(dev); - struct fm10k_hw *hw = &interface->hw; u16 vid, glort = interface->glort; s32 err; if (!is_valid_ether_addr(addr)) return -EADDRNOTAVAIL; - for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; + for (vid = fm10k_find_next_vlan(interface, 0); vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { err = fm10k_queue_mac_request(interface, glort, @@ -1106,14 +1115,13 @@ static int __fm10k_mc_sync(struct net_device *dev, const unsigned char *addr, bool sync) { struct fm10k_intfc *interface = netdev_priv(dev); - struct fm10k_hw *hw = &interface->hw; u16 vid, glort = interface->glort; s32 err; if (!is_multicast_ether_addr(addr)) return -EADDRNOTAVAIL; - for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; + for (vid = fm10k_find_next_vlan(interface, 0); vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { err = fm10k_queue_mac_request(interface, glort, @@ -1157,10 +1165,12 @@ static void fm10k_set_rx_mode(struct net_device *dev) /* update xcast mode first, but only if it changed */ if (interface->xcast_mode != xcast_mode) { - /* update VLAN table */ + /* update VLAN table when entering promiscuous mode */ if (xcast_mode == FM10K_XCAST_MODE_PROMISC) fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, 0, true); + + /* clear VLAN table when exiting promiscuous mode */ if (interface->xcast_mode == FM10K_XCAST_MODE_PROMISC) fm10k_clear_unused_vlans(interface); @@ -1182,9 +1192,10 @@ static void fm10k_set_rx_mode(struct net_device *dev) void fm10k_restore_rx_state(struct fm10k_intfc *interface) { + struct fm10k_l2_accel *l2_accel = interface->l2_accel; struct net_device *netdev = interface->netdev; struct fm10k_hw *hw = &interface->hw; - int xcast_mode; + int xcast_mode, i; u16 vid, glort; /* record glort for this interface */ @@ -1211,11 +1222,8 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, 0, xcast_mode == FM10K_XCAST_MODE_PROMISC); - /* Add filter for VLAN 0 */ - fm10k_queue_vlan_request(interface, 0, 0, true); - /* update table with current entries */ - for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; + for (vid = fm10k_find_next_vlan(interface, 0); vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { fm10k_queue_vlan_request(interface, vid, 0, true); @@ -1234,6 +1242,24 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) __dev_uc_sync(netdev, fm10k_uc_sync, fm10k_uc_unsync); __dev_mc_sync(netdev, fm10k_mc_sync, fm10k_mc_unsync); + /* synchronize macvlan addresses */ + if (l2_accel) { + for (i = 0; i < l2_accel->size; i++) { + struct net_device *sdev = l2_accel->macvlan[i]; + + if (!sdev) + continue; + + glort = l2_accel->dglort + 1 + i; + + hw->mac.ops.update_xcast_mode(hw, glort, + FM10K_XCAST_MODE_MULTI); + fm10k_queue_mac_request(interface, glort, + sdev->dev_addr, + hw->mac.default_vid, true); + } + } + fm10k_mbx_unlock(interface); /* record updated xcast mode state */ @@ -1490,7 +1516,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev, hw->mac.ops.update_xcast_mode(hw, glort, FM10K_XCAST_MODE_MULTI); fm10k_queue_mac_request(interface, glort, sdev->dev_addr, - 0, true); + hw->mac.default_vid, true); } fm10k_mbx_unlock(interface); @@ -1530,7 +1556,7 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv) hw->mac.ops.update_xcast_mode(hw, glort, FM10K_XCAST_MODE_NONE); fm10k_queue_mac_request(interface, glort, sdev->dev_addr, - 0, false); + hw->mac.default_vid, false); } fm10k_mbx_unlock(interface); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 7f605221a686..a434fecfdfeb 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2463,7 +2463,6 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) return err; } -#ifdef CONFIG_PM /** * fm10k_resume - Generic PM resume hook * @dev: generic device structure @@ -2472,7 +2471,7 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) * suspend or hibernation. This function does not need to handle lower PCIe * device state as the stack takes care of that for us. **/ -static int fm10k_resume(struct device *dev) +static int __maybe_unused fm10k_resume(struct device *dev) { struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev)); struct net_device *netdev = interface->netdev; @@ -2499,7 +2498,7 @@ static int fm10k_resume(struct device *dev) * system suspend or hibernation. This function does not need to handle lower * PCIe device state as the stack takes care of that for us. **/ -static int fm10k_suspend(struct device *dev) +static int __maybe_unused fm10k_suspend(struct device *dev) { struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev)); struct net_device *netdev = interface->netdev; @@ -2511,8 +2510,6 @@ static int fm10k_suspend(struct device *dev) return 0; } -#endif /* CONFIG_PM */ - /** * fm10k_io_error_detected - called when PCI error is detected * @pdev: Pointer to PCI device @@ -2643,11 +2640,9 @@ static struct pci_driver fm10k_driver = { .id_table = fm10k_pci_tbl, .probe = fm10k_probe, .remove = fm10k_remove, -#ifdef CONFIG_PM .driver = { .pm = &fm10k_pm_ops, }, -#endif /* CONFIG_PM */ .sriov_configure = fm10k_iov_configure, .err_handler = &fm10k_err_handler }; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 425d814aed4d..d6406fc31ffb 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -866,7 +866,7 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, /* Determine correct default VLAN ID. The FM10K_VLAN_OVERRIDE bit is * used here to indicate to the VF that it will not have privilege to * write VLAN_TABLE. All policy is enforced on the PF but this allows - * the VF to correctly report errors to userspace rqeuests. + * the VF to correctly report errors to userspace requests. */ if (vf_info->pf_vid) vf_vid = vf_info->pf_vid | FM10K_VLAN_OVERRIDE; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 9af74253c3f7..d9670cd8743f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -1027,7 +1027,7 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, hw->aq.arq.next_to_clean = ntc; hw->aq.arq.next_to_use = ntu; - i40e_nvmupd_check_wait_event(hw, le16_to_cpu(e->desc.opcode)); + i40e_nvmupd_check_wait_event(hw, le16_to_cpu(e->desc.opcode), &e->desc); clean_arq_element_out: /* Set pending if needed, unlock and return */ if (pending) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index c5776340517c..0d471b0db0f4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -2231,8 +2231,12 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access); */ struct i40e_aqc_nvm_update { u8 command_flags; -#define I40E_AQ_NVM_LAST_CMD 0x01 -#define I40E_AQ_NVM_FLASH_ONLY 0x80 +#define I40E_AQ_NVM_LAST_CMD 0x01 +#define I40E_AQ_NVM_FLASH_ONLY 0x80 +#define I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT 1 +#define I40E_AQ_NVM_PRESERVATION_FLAGS_MASK 0x03 +#define I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED 0x03 +#define I40E_AQ_NVM_PRESERVATION_FLAGS_ALL 0x01 u8 module_pointer; __le16 length; __le32 offset; diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 1b1e2acbd07f..0de9610c1d8d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -378,11 +378,11 @@ void i40e_client_subtask(struct i40e_pf *pf) if (!client || !cdev) return; - /* Here we handle client opens. If the client is down, but - * the netdev is up, then open the client. + /* Here we handle client opens. If the client is down, and + * the netdev is registered, then open the client. */ if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { - if (!test_bit(__I40E_VSI_DOWN, vsi->state) && + if (vsi->netdev_registered && client->ops && client->ops->open) { set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); ret = client->ops->open(&cdev->lan_info, client); @@ -393,17 +393,19 @@ void i40e_client_subtask(struct i40e_pf *pf) i40e_client_del_instance(pf); } } - } else { - /* Likewise for client close. If the client is up, but the netdev - * is down, then close the client. - */ - if (test_bit(__I40E_VSI_DOWN, vsi->state) && - client->ops && client->ops->close) { - clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); - client->ops->close(&cdev->lan_info, client, false); - i40e_client_release_qvlist(&cdev->lan_info); - } } + + /* enable/disable PE TCP_ENA flag based on netdev down/up + */ + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + i40e_client_update_vsi_ctxt(&cdev->lan_info, client, + 0, 0, 0, + I40E_CLIENT_VSI_FLAG_TCP_ENABLE); + else + i40e_client_update_vsi_ctxt(&cdev->lan_info, client, + 0, 0, + I40E_CLIENT_VSI_FLAG_TCP_ENABLE, + I40E_CLIENT_VSI_FLAG_TCP_ENABLE); } /** @@ -717,13 +719,13 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev, return -ENOENT; } - if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) && - (flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) { + if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE) && + (flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE)) { ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; - } else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) && - !(flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) { + } else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE) && + !(flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE)) { ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags &= ~I40E_AQ_VSI_QUE_OPT_TCP_ENA; diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h index 15b21a5315b5..ba55c889e4c5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.h +++ b/drivers/net/ethernet/intel/i40e/i40e_client.h @@ -132,7 +132,7 @@ struct i40e_info { #define I40E_CLIENT_RESET_LEVEL_PF 1 #define I40E_CLIENT_RESET_LEVEL_CORE 2 -#define I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE BIT(1) +#define I40E_CLIENT_VSI_FLAG_TCP_ENABLE BIT(1) struct i40e_ops { /* setup_q_vector_list enables queues with a particular vector */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 40c5f7628aa1..ee6052ecd215 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1486,6 +1486,7 @@ u32 i40e_led_get(struct i40e_hw *hw) case I40E_COMBINED_ACTIVITY: case I40E_FILTER_ACTIVITY: case I40E_MAC_ACTIVITY: + case I40E_LINK_ACTIVITY: continue; default: break; @@ -1534,6 +1535,7 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink) case I40E_COMBINED_ACTIVITY: case I40E_FILTER_ACTIVITY: case I40E_MAC_ACTIVITY: + case I40E_LINK_ACTIVITY: continue; default: break; @@ -1544,9 +1546,6 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink) gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK); - if (mode == I40E_LINK_ACTIVITY) - blink = false; - if (blink) gpio_val |= BIT(I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT); else @@ -3465,13 +3464,14 @@ exit: * @length: length of the section to be written (in bytes from the offset) * @data: command buffer (size [bytes] = length) * @last_command: tells if this is the last command in a series + * @preservation_flags: Preservation mode flags * @cmd_details: pointer to command details structure or NULL * * Update the NVM using the admin queue commands **/ i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer, u32 offset, u16 length, void *data, - bool last_command, + bool last_command, u8 preservation_flags, struct i40e_asq_cmd_details *cmd_details) { struct i40e_aq_desc desc; @@ -3490,6 +3490,16 @@ i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer, /* If this is the last command in a series, set the proper flag. */ if (last_command) cmd->command_flags |= I40E_AQ_NVM_LAST_CMD; + if (hw->mac.type == I40E_MAC_X722) { + if (preservation_flags == I40E_NVM_PRESERVATION_FLAGS_SELECTED) + cmd->command_flags |= + (I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED << + I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT); + else if (preservation_flags == I40E_NVM_PRESERVATION_FLAGS_ALL) + cmd->command_flags |= + (I40E_AQ_NVM_PRESERVATION_FLAGS_ALL << + I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT); + } cmd->module_pointer = module_pointer; cmd->offset = cpu_to_le32(offset); cmd->length = cpu_to_le16(length); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2ab22eba0c7c..a222d691958d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4877,104 +4877,6 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) #endif /** - * i40e_detect_recover_hung_queue - Function to detect and recover hung_queue - * @q_idx: TX queue number - * @vsi: Pointer to VSI struct - * - * This function checks specified queue for given VSI. Detects hung condition. - * We proactively detect hung TX queues by checking if interrupts are disabled - * but there are pending descriptors. If it appears hung, attempt to recover - * by triggering a SW interrupt. - **/ -static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) -{ - struct i40e_ring *tx_ring = NULL; - struct i40e_pf *pf; - u32 val, tx_pending; - int i; - - pf = vsi->back; - - /* now that we have an index, find the tx_ring struct */ - for (i = 0; i < vsi->num_queue_pairs; i++) { - if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { - if (q_idx == vsi->tx_rings[i]->queue_index) { - tx_ring = vsi->tx_rings[i]; - break; - } - } - } - - if (!tx_ring) - return; - - /* Read interrupt register */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) - val = rd32(&pf->hw, - I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx + - tx_ring->vsi->base_vector - 1)); - else - val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); - - tx_pending = i40e_get_tx_pending(tx_ring); - - /* Interrupts are disabled and TX pending is non-zero, - * trigger the SW interrupt (don't wait). Worst case - * there will be one extra interrupt which may result - * into not cleaning any queues because queues are cleaned. - */ - if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) - i40e_force_wb(vsi, tx_ring->q_vector); -} - -/** - * i40e_detect_recover_hung - Function to detect and recover hung_queues - * @pf: pointer to PF struct - * - * LAN VSI has netdev and netdev has TX queues. This function is to check - * each of those TX queues if they are hung, trigger recovery by issuing - * SW interrupt. - **/ -static void i40e_detect_recover_hung(struct i40e_pf *pf) -{ - struct net_device *netdev; - struct i40e_vsi *vsi; - unsigned int i; - - /* Only for LAN VSI */ - vsi = pf->vsi[pf->lan_vsi]; - - if (!vsi) - return; - - /* Make sure, VSI state is not DOWN/RECOVERY_PENDING */ - if (test_bit(__I40E_VSI_DOWN, vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state)) - return; - - /* Make sure type is MAIN VSI */ - if (vsi->type != I40E_VSI_MAIN) - return; - - netdev = vsi->netdev; - if (!netdev) - return; - - /* Bail out if netif_carrier is not OK */ - if (!netif_carrier_ok(netdev)) - return; - - /* Go thru' TX queues for netdev */ - for (i = 0; i < netdev->num_tx_queues; i++) { - struct netdev_queue *q; - - q = netdev_get_tx_queue(netdev, i); - if (q) - i40e_detect_recover_hung_queue(i, vsi); - } -} - -/** * i40e_get_iscsi_tc_map - Return TC map for iSCSI APP * @pf: pointer to PF * @@ -5342,6 +5244,8 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi, static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) { u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0}; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; struct i40e_vsi_context ctxt; int ret = 0; int i; @@ -5359,10 +5263,40 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share); if (ret) { - dev_info(&vsi->back->pdev->dev, + struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0}; + + dev_info(&pf->pdev->dev, "Failed configuring TC map %d for VSI %d\n", enabled_tc, vsi->seid); - goto out; + ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, + &bw_config, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "Failed querying vsi bw info, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + goto out; + } + if ((bw_config.tc_valid_bits & enabled_tc) != enabled_tc) { + u8 valid_tc = bw_config.tc_valid_bits & enabled_tc; + + if (!valid_tc) + valid_tc = bw_config.tc_valid_bits; + /* Always enable TC0, no matter what */ + valid_tc |= 1; + dev_info(&pf->pdev->dev, + "Requested tc 0x%x, but FW reports 0x%x as valid. Attempting to use 0x%x.\n", + enabled_tc, bw_config.tc_valid_bits, valid_tc); + enabled_tc = valid_tc; + } + + ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share); + if (ret) { + dev_err(&pf->pdev->dev, + "Unable to configure TC map %d for VSI %d\n", + enabled_tc, vsi->seid); + goto out; + } } /* Update Queue Pairs Mapping for currently enabled UPs */ @@ -5402,13 +5336,12 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) /* Update the VSI after updating the VSI queue-mapping * information */ - ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) { - dev_info(&vsi->back->pdev->dev, + dev_info(&pf->pdev->dev, "Update vsi tc config failed, err %s aq_err %s\n", - i40e_stat_str(&vsi->back->hw, ret), - i40e_aq_str(&vsi->back->hw, - vsi->back->hw.aq.asq_last_status)); + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); goto out; } /* update the local VSI info with updated queue map */ @@ -5418,11 +5351,10 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) /* Update current VSI BW information */ ret = i40e_vsi_get_bw_info(vsi); if (ret) { - dev_info(&vsi->back->pdev->dev, + dev_info(&pf->pdev->dev, "Failed updating vsi bw info, err %s aq_err %s\n", - i40e_stat_str(&vsi->back->hw, ret), - i40e_aq_str(&vsi->back->hw, - vsi->back->hw.aq.asq_last_status)); + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); goto out; } @@ -7505,6 +7437,8 @@ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np, { struct i40e_vsi *vsi = np->vsi; + if (!tc_can_offload(vsi->netdev)) + return -EOPNOTSUPP; if (cls_flower->common.chain_index) return -EOPNOTSUPP; @@ -9075,6 +9009,17 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) vsi->uplink_seid); return ret; } + /* Reconfigure TX queues using QTX_CTL register */ + ret = i40e_channel_config_tx_ring(vsi->back, vsi, ch); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "failed to configure TX rings for channel %u\n", + ch->seid); + return ret; + } + /* update 'next_base_queue' */ + vsi->next_base_queue = vsi->next_base_queue + + ch->num_queue_pairs; if (ch->max_tx_rate) { u64 credits = ch->max_tx_rate; @@ -9695,7 +9640,7 @@ static void i40e_service_task(struct work_struct *work) if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state)) return; - i40e_detect_recover_hung(pf); + i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]); i40e_sync_filters_subtask(pf); i40e_reset_subtask(pf); i40e_handle_mdd_event(pf); @@ -10462,10 +10407,9 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) /* set up vector assignment tracking */ size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * vectors); pf->irq_pile = kzalloc(size, GFP_KERNEL); - if (!pf->irq_pile) { - dev_err(&pf->pdev->dev, "error allocating irq_pile memory\n"); + if (!pf->irq_pile) return -ENOMEM; - } + pf->irq_pile->num_entries = vectors; pf->irq_pile->search_hint = 0; @@ -10783,8 +10727,13 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) /* Determine the RSS size of the VSI */ if (!vsi->rss_size) { u16 qcount; - - qcount = vsi->num_queue_pairs / vsi->tc_config.numtc; + /* If the firmware does something weird during VSI init, we + * could end up with zero TCs. Check for that to avoid + * divide-by-zero. It probably won't pass traffic, but it also + * won't panic. + */ + qcount = vsi->num_queue_pairs / + (vsi->tc_config.numtc ? vsi->tc_config.numtc : 1); vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount); } if (!vsi->rss_size) @@ -10972,7 +10921,7 @@ i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf) ret = i40e_aq_update_nvm(&pf->hw, I40E_SR_NVM_CONTROL_WORD, 0x10, sizeof(nvm_word), - &nvm_word, true, NULL); + &nvm_word, true, 0, NULL); /* Save off last admin queue command status before releasing * the NVM */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 425713fb72e5..76a5cb04e4fe 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -239,8 +239,9 @@ read_nvm_exit: * * Writes a 16 bit words buffer to the Shadow RAM using the admin command. **/ -static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer, - u32 offset, u16 words, void *data, +static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw, + u8 module_pointer, u32 offset, + u16 words, void *data, bool last_command) { i40e_status ret_code = I40E_ERR_NVM; @@ -496,7 +497,8 @@ static i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer, ret_code = i40e_aq_update_nvm(hw, module_pointer, 2 * offset, /*bytes*/ 2 * words, /*bytes*/ - data, last_command, &cmd_details); + data, last_command, 0, + &cmd_details); return ret_code; } @@ -677,6 +679,9 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw, static i40e_status i40e_nvmupd_get_aq_result(struct i40e_hw *hw, struct i40e_nvm_access *cmd, u8 *bytes, int *perrno); +static i40e_status i40e_nvmupd_get_aq_event(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno); static inline u8 i40e_nvmupd_get_module(u32 val) { return (u8)(val & I40E_NVM_MOD_PNT_MASK); @@ -686,6 +691,12 @@ static inline u8 i40e_nvmupd_get_transaction(u32 val) return (u8)((val & I40E_NVM_TRANS_MASK) >> I40E_NVM_TRANS_SHIFT); } +static inline u8 i40e_nvmupd_get_preservation_flags(u32 val) +{ + return (u8)((val & I40E_NVM_PRESERVATION_FLAGS_MASK) >> + I40E_NVM_PRESERVATION_FLAGS_SHIFT); +} + static const char * const i40e_nvm_update_state_str[] = { "I40E_NVMUPD_INVALID", "I40E_NVMUPD_READ_CON", @@ -703,6 +714,7 @@ static const char * const i40e_nvm_update_state_str[] = { "I40E_NVMUPD_STATUS", "I40E_NVMUPD_EXEC_AQ", "I40E_NVMUPD_GET_AQ_RESULT", + "I40E_NVMUPD_GET_AQ_EVENT", }; /** @@ -798,9 +810,9 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, * the wait info and return before doing anything else */ if (cmd->offset == 0xffff) { - i40e_nvmupd_check_wait_event(hw, hw->nvm_wait_opcode); + i40e_nvmupd_clear_wait_state(hw); status = 0; - goto exit; + break; } status = I40E_ERR_NOT_READY; @@ -815,7 +827,7 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, *perrno = -ESRCH; break; } -exit: + mutex_unlock(&hw->aq.arq_mutex); return status; } @@ -944,6 +956,10 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, status = i40e_nvmupd_get_aq_result(hw, cmd, bytes, perrno); break; + case I40E_NVMUPD_GET_AQ_EVENT: + status = i40e_nvmupd_get_aq_event(hw, cmd, bytes, perrno); + break; + default: i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: bad cmd %s in init state\n", @@ -1118,38 +1134,53 @@ retry: } /** - * i40e_nvmupd_check_wait_event - handle NVM update operation events + * i40e_nvmupd_clear_wait_state - clear wait state on hw * @hw: pointer to the hardware structure - * @opcode: the event that just happened **/ -void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode) +void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw) { - if (opcode == hw->nvm_wait_opcode) { - i40e_debug(hw, I40E_DEBUG_NVM, - "NVMUPD: clearing wait on opcode 0x%04x\n", opcode); - if (hw->nvm_release_on_done) { - i40e_release_nvm(hw); - hw->nvm_release_on_done = false; - } - hw->nvm_wait_opcode = 0; + i40e_debug(hw, I40E_DEBUG_NVM, + "NVMUPD: clearing wait on opcode 0x%04x\n", + hw->nvm_wait_opcode); - if (hw->aq.arq_last_status) { - hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR; - return; - } + if (hw->nvm_release_on_done) { + i40e_release_nvm(hw); + hw->nvm_release_on_done = false; + } + hw->nvm_wait_opcode = 0; - switch (hw->nvmupd_state) { - case I40E_NVMUPD_STATE_INIT_WAIT: - hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; - break; + if (hw->aq.arq_last_status) { + hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR; + return; + } - case I40E_NVMUPD_STATE_WRITE_WAIT: - hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING; - break; + switch (hw->nvmupd_state) { + case I40E_NVMUPD_STATE_INIT_WAIT: + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + break; - default: - break; - } + case I40E_NVMUPD_STATE_WRITE_WAIT: + hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING; + break; + + default: + break; + } +} + +/** + * i40e_nvmupd_check_wait_event - handle NVM update operation events + * @hw: pointer to the hardware structure + * @opcode: the event that just happened + **/ +void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode, + struct i40e_aq_desc *desc) +{ + u32 aq_desc_len = sizeof(struct i40e_aq_desc); + + if (opcode == hw->nvm_wait_opcode) { + memcpy(&hw->nvm_aq_event_desc, desc, aq_desc_len); + i40e_nvmupd_clear_wait_state(hw); } } @@ -1205,6 +1236,9 @@ static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw, else if (module == 0) upd_cmd = I40E_NVMUPD_GET_AQ_RESULT; break; + case I40E_NVM_AQE: + upd_cmd = I40E_NVMUPD_GET_AQ_EVENT; + break; } break; @@ -1267,6 +1301,9 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw, u32 aq_data_len; i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); + if (cmd->offset == 0xffff) + return 0; + memset(&cmd_details, 0, sizeof(cmd_details)); cmd_details.wb_desc = &hw->nvm_wb_desc; @@ -1302,6 +1339,9 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw, } } + if (cmd->offset) + memset(&hw->nvm_aq_event_desc, 0, aq_desc_len); + /* and away we go! */ status = i40e_asq_send_command(hw, aq_desc, buff, buff_size, &cmd_details); @@ -1311,6 +1351,7 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); *perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status); + return status; } /* should we wait for a followup event? */ @@ -1392,6 +1433,40 @@ static i40e_status i40e_nvmupd_get_aq_result(struct i40e_hw *hw, } /** + * i40e_nvmupd_get_aq_event - Get the Admin Queue event from previous exec_aq + * @hw: pointer to hardware structure + * @cmd: pointer to nvm update command buffer + * @bytes: pointer to the data buffer + * @perrno: pointer to return error code + * + * cmd structure contains identifiers and data buffer + **/ +static i40e_status i40e_nvmupd_get_aq_event(struct i40e_hw *hw, + struct i40e_nvm_access *cmd, + u8 *bytes, int *perrno) +{ + u32 aq_total_len; + u32 aq_desc_len; + + i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__); + + aq_desc_len = sizeof(struct i40e_aq_desc); + aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_aq_event_desc.datalen); + + /* check copylength range */ + if (cmd->data_size > aq_total_len) { + i40e_debug(hw, I40E_DEBUG_NVM, + "%s: copy length %d too big, trimming to %d\n", + __func__, cmd->data_size, aq_total_len); + cmd->data_size = aq_total_len; + } + + memcpy(bytes, &hw->nvm_aq_event_desc, cmd->data_size); + + return 0; +} + +/** * i40e_nvmupd_nvm_read - Read NVM * @hw: pointer to hardware structure * @cmd: pointer to nvm update command buffer @@ -1486,18 +1561,20 @@ static i40e_status i40e_nvmupd_nvm_write(struct i40e_hw *hw, i40e_status status = 0; struct i40e_asq_cmd_details cmd_details; u8 module, transaction; + u8 preservation_flags; bool last; transaction = i40e_nvmupd_get_transaction(cmd->config); module = i40e_nvmupd_get_module(cmd->config); last = (transaction & I40E_NVM_LCB); + preservation_flags = i40e_nvmupd_get_preservation_flags(cmd->config); memset(&cmd_details, 0, sizeof(cmd_details)); cmd_details.wb_desc = &hw->nvm_wb_desc; status = i40e_aq_update_nvm(hw, module, cmd->offset, (u16)cmd->data_size, bytes, last, - &cmd_details); + preservation_flags, &cmd_details); if (status) { i40e_debug(hw, I40E_DEBUG_NVM, "i40e_nvmupd_nvm_write mod 0x%x off 0x%x len 0x%x\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index b3cc89cc3a86..187dd53e0056 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -214,7 +214,7 @@ i40e_status i40e_aq_discover_capabilities(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer, u32 offset, u16 length, void *data, - bool last_command, + bool last_command, u8 preservation_flags, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type, u8 mib_type, void *buff, u16 buff_size, @@ -333,7 +333,9 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw, i40e_status i40e_nvmupd_command(struct i40e_hw *hw, struct i40e_nvm_access *cmd, u8 *bytes, int *); -void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode); +void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode, + struct i40e_aq_desc *desc); +void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw); void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status); extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[]; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 40edb6e5e6f6..8d2275830a40 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -726,6 +726,59 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring) return 0; } +/** + * i40e_detect_recover_hung - Function to detect and recover hung_queues + * @vsi: pointer to vsi struct with tx queues + * + * VSI has netdev and netdev has TX queues. This function is to check each of + * those TX queues if they are hung, trigger recovery by issuing SW interrupt. + **/ +void i40e_detect_recover_hung(struct i40e_vsi *vsi) +{ + struct i40e_ring *tx_ring = NULL; + struct net_device *netdev; + unsigned int i; + int packets; + + if (!vsi) + return; + + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + return; + + netdev = vsi->netdev; + if (!netdev) + return; + + if (!netif_carrier_ok(netdev)) + return; + + for (i = 0; i < vsi->num_queue_pairs; i++) { + tx_ring = vsi->tx_rings[i]; + if (tx_ring && tx_ring->desc) { + /* If packet counter has not changed the queue is + * likely stalled, so force an interrupt for this + * queue. + * + * prev_pkt_ctr would be negative if there was no + * pending work. + */ + packets = tx_ring->stats.packets & INT_MAX; + if (tx_ring->tx_stats.prev_pkt_ctr == packets) { + i40e_force_wb(vsi, tx_ring->q_vector); + continue; + } + + /* Memory barrier between read of packet count and call + * to i40e_get_tx_pending() + */ + smp_rmb(); + tx_ring->tx_stats.prev_pkt_ctr = + i40e_get_tx_pending(tx_ring) ? packets : -1; + } + } +} + #define WB_STRIDE 4 /** @@ -1163,6 +1216,7 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; + tx_ring->tx_stats.prev_pkt_ctr = -1; return 0; err: diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 2d08760fc4ce..d4799b41e98a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -333,6 +333,7 @@ struct i40e_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; + int prev_pkt_ctr; }; struct i40e_rx_queue_stats { @@ -501,6 +502,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring); int i40e_napi_poll(struct napi_struct *napi, int budget); void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); u32 i40e_get_tx_pending(struct i40e_ring *ring); +void i40e_detect_recover_hung(struct i40e_vsi *vsi); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 5a708c363d99..cd294e6a8587 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -402,6 +402,7 @@ enum i40e_nvmupd_cmd { I40E_NVMUPD_STATUS, I40E_NVMUPD_EXEC_AQ, I40E_NVMUPD_GET_AQ_RESULT, + I40E_NVMUPD_GET_AQ_EVENT, }; enum i40e_nvmupd_state { @@ -421,15 +422,21 @@ enum i40e_nvmupd_state { #define I40E_NVM_MOD_PNT_MASK 0xFF -#define I40E_NVM_TRANS_SHIFT 8 -#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT) -#define I40E_NVM_CON 0x0 -#define I40E_NVM_SNT 0x1 -#define I40E_NVM_LCB 0x2 -#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB) -#define I40E_NVM_ERA 0x4 -#define I40E_NVM_CSUM 0x8 -#define I40E_NVM_EXEC 0xf +#define I40E_NVM_TRANS_SHIFT 8 +#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT) +#define I40E_NVM_PRESERVATION_FLAGS_SHIFT 12 +#define I40E_NVM_PRESERVATION_FLAGS_MASK \ + (0x3 << I40E_NVM_PRESERVATION_FLAGS_SHIFT) +#define I40E_NVM_PRESERVATION_FLAGS_SELECTED 0x01 +#define I40E_NVM_PRESERVATION_FLAGS_ALL 0x02 +#define I40E_NVM_CON 0x0 +#define I40E_NVM_SNT 0x1 +#define I40E_NVM_LCB 0x2 +#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB) +#define I40E_NVM_ERA 0x4 +#define I40E_NVM_CSUM 0x8 +#define I40E_NVM_AQE 0xe +#define I40E_NVM_EXEC 0xf #define I40E_NVM_ADAPT_SHIFT 16 #define I40E_NVM_ADAPT_MASK (0xffff << I40E_NVM_ADAPT_SHIFT) @@ -611,6 +618,7 @@ struct i40e_hw { /* state of nvm update process */ enum i40e_nvmupd_state nvmupd_state; struct i40e_aq_desc nvm_wb_desc; + struct i40e_aq_desc nvm_aq_event_desc; struct i40e_virt_mem nvm_buff; bool nvm_release_on_done; u16 nvm_wait_opcode; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 435a112d09f5..b0e6454995b6 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -2196,8 +2196,12 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access); */ struct i40e_aqc_nvm_update { u8 command_flags; -#define I40E_AQ_NVM_LAST_CMD 0x01 -#define I40E_AQ_NVM_FLASH_ONLY 0x80 +#define I40E_AQ_NVM_LAST_CMD 0x01 +#define I40E_AQ_NVM_FLASH_ONLY 0x80 +#define I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT 1 +#define I40E_AQ_NVM_PRESERVATION_FLAGS_MASK 0x03 +#define I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED 0x03 +#define I40E_AQ_NVM_PRESERVATION_FLAGS_ALL 0x01 u8 module_pointer; __le16 length; __le32 offset; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 1ba29bb85b67..c7831f7f7761 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -148,6 +148,59 @@ u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw) return 0; } +/** + * i40evf_detect_recover_hung - Function to detect and recover hung_queues + * @vsi: pointer to vsi struct with tx queues + * + * VSI has netdev and netdev has TX queues. This function is to check each of + * those TX queues if they are hung, trigger recovery by issuing SW interrupt. + **/ +void i40evf_detect_recover_hung(struct i40e_vsi *vsi) +{ + struct i40e_ring *tx_ring = NULL; + struct net_device *netdev; + unsigned int i; + int packets; + + if (!vsi) + return; + + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + return; + + netdev = vsi->netdev; + if (!netdev) + return; + + if (!netif_carrier_ok(netdev)) + return; + + for (i = 0; i < vsi->back->num_active_queues; i++) { + tx_ring = &vsi->back->tx_rings[i]; + if (tx_ring && tx_ring->desc) { + /* If packet counter has not changed the queue is + * likely stalled, so force an interrupt for this + * queue. + * + * prev_pkt_ctr would be negative if there was no + * pending work. + */ + packets = tx_ring->stats.packets & INT_MAX; + if (tx_ring->tx_stats.prev_pkt_ctr == packets) { + i40evf_force_wb(vsi, tx_ring->q_vector); + continue; + } + + /* Memory barrier between read of packet count and call + * to i40evf_get_tx_pending() + */ + smp_rmb(); + tx_ring->tx_stats.prev_pkt_ctr = + i40evf_get_tx_pending(tx_ring, false) ? packets : -1; + } + } +} + #define WB_STRIDE 4 /** @@ -469,6 +522,7 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; + tx_ring->tx_stats.prev_pkt_ctr = -1; return 0; err: diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 8d26c85d12e1..e72f16b4555b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -313,6 +313,7 @@ struct i40e_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; + int prev_pkt_ctr; u64 tx_lost_interrupt; }; @@ -467,6 +468,7 @@ void i40evf_free_rx_resources(struct i40e_ring *rx_ring); int i40evf_napi_poll(struct napi_struct *napi, int budget); void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw); +void i40evf_detect_recover_hung(struct i40e_vsi *vsi); int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40evf_chk_linearize(struct sk_buff *skb); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 6afc31616e04..54951c84a481 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -361,6 +361,7 @@ enum i40e_nvmupd_cmd { I40E_NVMUPD_STATUS, I40E_NVMUPD_EXEC_AQ, I40E_NVMUPD_GET_AQ_RESULT, + I40E_NVMUPD_GET_AQ_EVENT, }; enum i40e_nvmupd_state { @@ -380,15 +381,21 @@ enum i40e_nvmupd_state { #define I40E_NVM_MOD_PNT_MASK 0xFF -#define I40E_NVM_TRANS_SHIFT 8 -#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT) -#define I40E_NVM_CON 0x0 -#define I40E_NVM_SNT 0x1 -#define I40E_NVM_LCB 0x2 -#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB) -#define I40E_NVM_ERA 0x4 -#define I40E_NVM_CSUM 0x8 -#define I40E_NVM_EXEC 0xf +#define I40E_NVM_TRANS_SHIFT 8 +#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT) +#define I40E_NVM_PRESERVATION_FLAGS_SHIFT 12 +#define I40E_NVM_PRESERVATION_FLAGS_MASK \ + (0x3 << I40E_NVM_PRESERVATION_FLAGS_SHIFT) +#define I40E_NVM_PRESERVATION_FLAGS_SELECTED 0x01 +#define I40E_NVM_PRESERVATION_FLAGS_ALL 0x02 +#define I40E_NVM_CON 0x0 +#define I40E_NVM_SNT 0x1 +#define I40E_NVM_LCB 0x2 +#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB) +#define I40E_NVM_ERA 0x4 +#define I40E_NVM_CSUM 0x8 +#define I40E_NVM_AQE 0xe +#define I40E_NVM_EXEC 0xf #define I40E_NVM_ADAPT_SHIFT 16 #define I40E_NVM_ADAPT_MASK (0xffff << I40E_NVM_ADAPT_SHIFT) @@ -561,6 +568,7 @@ struct i40e_hw { /* state of nvm update process */ enum i40e_nvmupd_state nvmupd_state; struct i40e_aq_desc nvm_wb_desc; + struct i40e_aq_desc nvm_aq_event_desc; struct i40e_virt_mem nvm_buff; bool nvm_release_on_done; u16 nvm_wait_opcode; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 47040ab2e298..33c0ffcc8b13 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -187,6 +187,7 @@ enum i40evf_state_t { enum i40evf_critical_section_t { __I40EVF_IN_CRITICAL_TASK, /* cannot be interrupted */ __I40EVF_IN_CLIENT_TASK, + __I40EVF_IN_REMOVE_TASK, /* device being removed */ }; /* board specific private data structure */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index f92587aba3c7..8934f784e96f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1716,6 +1716,8 @@ static void i40evf_watchdog_task(struct work_struct *work) if (adapter->state == __I40EVF_RUNNING) i40evf_request_stats(adapter); watchdog_done: + if (adapter->state == __I40EVF_RUNNING) + i40evf_detect_recover_hung(&adapter->vsi); clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); restart_watchdog: if (adapter->state == __I40EVF_REMOVE) @@ -1803,6 +1805,12 @@ static void i40evf_reset_task(struct work_struct *work) int i = 0, err; bool running; + /* When device is being removed it doesn't make sense to run the reset + * task, just return in such a case. + */ + if (test_bit(__I40EVF_IN_REMOVE_TASK, &adapter->crit_section)) + return; + while (test_and_set_bit(__I40EVF_IN_CLIENT_TASK, &adapter->crit_section)) usleep_range(500, 1000); @@ -3053,7 +3061,8 @@ static void i40evf_remove(struct pci_dev *pdev) struct i40evf_mac_filter *f, *ftmp; struct i40e_hw *hw = &adapter->hw; int err; - + /* Indicate we are in remove and not to run reset_task */ + set_bit(__I40EVF_IN_REMOVE_TASK, &adapter->crit_section); cancel_delayed_work_sync(&adapter->init_task); cancel_work_sync(&adapter->reset_task); cancel_delayed_work_sync(&adapter->client_task); @@ -3088,8 +3097,6 @@ static void i40evf_remove(struct pci_dev *pdev) if (adapter->watchdog_timer.function) del_timer_sync(&adapter->watchdog_timer); - flush_scheduled_work(); - i40evf_free_rss(adapter); if (hw->aq.asq.count) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index feb95b62a077..50ce0d6c09ef 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -1001,23 +1001,34 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, if (v_opcode == VIRTCHNL_OP_EVENT) { struct virtchnl_pf_event *vpe = (struct virtchnl_pf_event *)msg; + bool link_up = vpe->event_data.link_event.link_status; switch (vpe->event) { case VIRTCHNL_EVENT_LINK_CHANGE: adapter->link_speed = vpe->event_data.link_event.link_speed; - if (adapter->link_up != - vpe->event_data.link_event.link_status) { - adapter->link_up = - vpe->event_data.link_event.link_status; - if (adapter->link_up) { - netif_tx_start_all_queues(netdev); - netif_carrier_on(netdev); - } else { - netif_tx_stop_all_queues(netdev); - netif_carrier_off(netdev); - } - i40evf_print_link_message(adapter); + + /* we've already got the right link status, bail */ + if (adapter->link_up == link_up) + break; + + /* If we get link up message and start queues before + * our queues are configured it will trigger a TX hang. + * In that case, just ignore the link status message, + * we'll get another one after we enable queues and + * actually prepared to send traffic. + */ + if (link_up && adapter->state != __I40EVF_RUNNING) + break; + + adapter->link_up = link_up; + if (link_up) { + netif_tx_start_all_queues(netdev); + netif_carrier_on(netdev); + } else { + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); } + i40evf_print_link_message(adapter); break; case VIRTCHNL_EVENT_RESET_IMPENDING: dev_info(&adapter->pdev->dev, "PF reset warning received\n"); diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 92845692087a..1c6b8d9176a8 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -690,6 +690,7 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igb_set_flag_queue_pairs(struct igb_adapter *, const u32); +unsigned int igb_get_max_rss_queues(struct igb_adapter *); #ifdef CONFIG_IGB_HWMON void igb_sysfs_exit(struct igb_adapter *adapter); int igb_sysfs_init(struct igb_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index d06a8db514d4..606e6761758f 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -3338,37 +3338,7 @@ static int igb_set_rxfh(struct net_device *netdev, const u32 *indir, static unsigned int igb_max_channels(struct igb_adapter *adapter) { - struct e1000_hw *hw = &adapter->hw; - unsigned int max_combined = 0; - - switch (hw->mac.type) { - case e1000_i211: - max_combined = IGB_MAX_RX_QUEUES_I211; - break; - case e1000_82575: - case e1000_i210: - max_combined = IGB_MAX_RX_QUEUES_82575; - break; - case e1000_i350: - if (!!adapter->vfs_allocated_count) { - max_combined = 1; - break; - } - /* fall through */ - case e1000_82576: - if (!!adapter->vfs_allocated_count) { - max_combined = 2; - break; - } - /* fall through */ - case e1000_82580: - case e1000_i354: - default: - max_combined = IGB_MAX_RX_QUEUES; - break; - } - - return max_combined; + return igb_get_max_rss_queues(adapter); } static void igb_get_channels(struct net_device *netdev, diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c208753ff5b7..b88fae785369 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1744,6 +1744,20 @@ static void igb_configure_cbs(struct igb_adapter *adapter, int queue, * value = idleSlope * 61034 * ----------------- (E6) * 1000000 + * + * NOTE: For i210, given the above, we can see that idleslope + * is represented in 16.38431 kbps units by the value at + * the TQAVCC register (1Gbps / 61034), which reduces + * the granularity for idleslope increments. + * For instance, if you want to configure a 2576kbps + * idleslope, the value to be written on the register + * would have to be 157.23. If rounded down, you end + * up with less bandwidth available than originally + * required (~2572 kbps). If rounded up, you end up + * with a higher bandwidth (~2589 kbps). Below the + * approach we take is to always round up the + * calculated value, so the resulting bandwidth might + * be slightly higher for some configurations. */ value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000); @@ -3200,8 +3214,6 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) /* if allocation failed then we do not support SR-IOV */ if (!adapter->vf_data) { adapter->vfs_allocated_count = 0; - dev_err(&pdev->dev, - "Unable to allocate memory for VF Data Storage\n"); err = -ENOMEM; goto out; } @@ -3373,10 +3385,10 @@ static void igb_probe_vfs(struct igb_adapter *adapter) #endif /* CONFIG_PCI_IOV */ } -static void igb_init_queue_configuration(struct igb_adapter *adapter) +unsigned int igb_get_max_rss_queues(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u32 max_rss_queues; + unsigned int max_rss_queues; /* Determine the maximum number of RSS queues supported. */ switch (hw->mac.type) { @@ -3407,6 +3419,14 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter) break; } + return max_rss_queues; +} + +static void igb_init_queue_configuration(struct igb_adapter *adapter) +{ + u32 max_rss_queues; + + max_rss_queues = igb_get_max_rss_queues(adapter); adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); igb_set_flag_queue_pairs(adapter, max_rss_queues); @@ -3676,7 +3696,7 @@ static int __igb_close(struct net_device *netdev, bool suspending) int igb_close(struct net_device *netdev) { - if (netif_device_present(netdev)) + if (netif_device_present(netdev) || netdev->dismantle) return __igb_close(netdev, false); return 0; } @@ -8718,7 +8738,8 @@ static void igb_rar_set_index(struct igb_adapter *adapter, u32 index) /* Indicate to hardware the Address is Valid. */ if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE) { - rar_high |= E1000_RAH_AV; + if (is_valid_ether_addr(addr)) + rar_high |= E1000_RAH_AV; if (hw->mac.type == e1000_82575) rar_high |= E1000_RAH_POOL_1 * @@ -8756,17 +8777,36 @@ static int igb_set_vf_mac(struct igb_adapter *adapter, static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) { struct igb_adapter *adapter = netdev_priv(netdev); - if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count)) + + if (vf >= adapter->vfs_allocated_count) + return -EINVAL; + + /* Setting the VF MAC to 0 reverts the IGB_VF_FLAG_PF_SET_MAC + * flag and allows to overwrite the MAC via VF netdev. This + * is necessary to allow libvirt a way to restore the original + * MAC after unbinding vfio-pci and reloading igbvf after shutting + * down a VM. + */ + if (is_zero_ether_addr(mac)) { + adapter->vf_data[vf].flags &= ~IGB_VF_FLAG_PF_SET_MAC; + dev_info(&adapter->pdev->dev, + "remove administratively set MAC on VF %d\n", + vf); + } else if (is_valid_ether_addr(mac)) { + adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; + dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", + mac, vf); + dev_info(&adapter->pdev->dev, + "Reload the VF driver to make this change effective."); + /* Generate additional warning if PF is down */ + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_warn(&adapter->pdev->dev, + "The VF MAC address has been set, but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, + "Bring the PF device up before attempting to use the VF device.\n"); + } + } else { return -EINVAL; - adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; - dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf); - dev_info(&adapter->pdev->dev, - "Reload the VF driver to make this change effective."); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_warn(&adapter->pdev->dev, - "The VF MAC address has been set, but the PF device is not up.\n"); - dev_warn(&adapter->pdev->dev, - "Bring the PF device up before attempting to use the VF device.\n"); } return igb_set_vf_mac(adapter, vf, mac); } diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 841c2a083349..0746b19ec6d3 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -643,6 +643,10 @@ static void igb_ptp_tx_work(struct work_struct *work) adapter->ptp_tx_skb = NULL; clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state); adapter->tx_hwtstamp_timeouts++; + /* Clear the tx valid bit in TSYNCTXCTL register to enable + * interrupt + */ + rd32(E1000_TXSTMPH); dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n"); return; } @@ -717,6 +721,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter) */ void igb_ptp_tx_hang(struct igb_adapter *adapter) { + struct e1000_hw *hw = &adapter->hw; bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + IGB_PTP_TX_TIMEOUT); @@ -736,6 +741,10 @@ void igb_ptp_tx_hang(struct igb_adapter *adapter) adapter->ptp_tx_skb = NULL; clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state); adapter->tx_hwtstamp_timeouts++; + /* Clear the tx valid bit in TSYNCTXCTL register to enable + * interrupt + */ + rd32(E1000_TXSTMPH); dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n"); } } diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile index 35e6fa643c7e..8319465eb38d 100644 --- a/drivers/net/ethernet/intel/ixgbe/Makefile +++ b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -42,3 +42,4 @@ ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o \ ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o +ixgbe-$(CONFIG_XFRM_OFFLOAD) += ixgbe_ipsec.o diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 49ab0c7a9cd5..c1e3a0039ea5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -52,6 +52,7 @@ #ifdef CONFIG_IXGBE_DCA #include <linux/dca.h> #endif +#include "ixgbe_ipsec.h" #include <net/xdp.h> #include <net/busy_poll.h> @@ -171,10 +172,11 @@ enum ixgbe_tx_flags { IXGBE_TX_FLAGS_CC = 0x08, IXGBE_TX_FLAGS_IPV4 = 0x10, IXGBE_TX_FLAGS_CSUM = 0x20, + IXGBE_TX_FLAGS_IPSEC = 0x40, /* software defined flags */ - IXGBE_TX_FLAGS_SW_VLAN = 0x40, - IXGBE_TX_FLAGS_FCOE = 0x80, + IXGBE_TX_FLAGS_SW_VLAN = 0x80, + IXGBE_TX_FLAGS_FCOE = 0x100, }; /* VLAN info */ @@ -629,15 +631,18 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_EEE_CAPABLE BIT(14) #define IXGBE_FLAG2_EEE_ENABLED BIT(15) #define IXGBE_FLAG2_RX_LEGACY BIT(16) +#define IXGBE_FLAG2_IPSEC_ENABLED BIT(17) /* Tx fast path data */ int num_tx_queues; u16 tx_itr_setting; u16 tx_work_limit; + u64 tx_ipsec; /* Rx fast path data */ int num_rx_queues; u16 rx_itr_setting; + u64 rx_ipsec; /* Port number used to identify VXLAN traffic */ __be16 vxlan_port; @@ -781,6 +786,10 @@ struct ixgbe_adapter { #define IXGBE_RSS_KEY_SIZE 40 /* size of RSS Hash Key in bytes */ u32 *rss_key; + +#ifdef CONFIG_XFRM + struct ixgbe_ipsec *ipsec; +#endif /* CONFIG_XFRM */ }; static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter) @@ -1011,4 +1020,24 @@ void ixgbe_store_key(struct ixgbe_adapter *adapter); void ixgbe_store_reta(struct ixgbe_adapter *adapter); s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm); +#ifdef CONFIG_XFRM_OFFLOAD +void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter); +void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter); +void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter); +void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb); +int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first, + struct ixgbe_ipsec_tx_data *itd); +#else +static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { }; +static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { }; +static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { }; +static inline void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) { }; +static inline int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, + struct ixgbe_ipsec_tx_data *itd) { return 0; }; +#endif /* CONFIG_XFRM_OFFLOAD */ #endif /* _IXGBE_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index f064099733b6..317351025fd7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -115,6 +115,8 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = { {"tx_hwtstamp_timeouts", IXGBE_STAT(tx_hwtstamp_timeouts)}, {"tx_hwtstamp_skipped", IXGBE_STAT(tx_hwtstamp_skipped)}, {"rx_hwtstamp_cleared", IXGBE_STAT(rx_hwtstamp_cleared)}, + {"tx_ipsec", IXGBE_STAT(tx_ipsec)}, + {"rx_ipsec", IXGBE_STAT(rx_ipsec)}, #ifdef IXGBE_FCOE {"fcoe_bad_fccrc", IXGBE_STAT(stats.fccrc)}, {"rx_fcoe_dropped", IXGBE_STAT(stats.fcoerpdc)}, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c new file mode 100644 index 000000000000..93eacddb6704 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -0,0 +1,941 @@ +/******************************************************************************* + * + * Intel 10 Gigabit PCI Express Linux driver + * Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Contact Information: + * Linux NICS <[email protected]> + * e1000-devel Mailing List <[email protected]> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + ******************************************************************************/ + +#include "ixgbe.h" +#include <net/xfrm.h> +#include <crypto/aead.h> + +/** + * ixgbe_ipsec_set_tx_sa - set the Tx SA registers + * @hw: hw specific details + * @idx: register index to write + * @key: key byte array + * @salt: salt bytes + **/ +static void ixgbe_ipsec_set_tx_sa(struct ixgbe_hw *hw, u16 idx, + u32 key[], u32 salt) +{ + u32 reg; + int i; + + for (i = 0; i < 4; i++) + IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(i), cpu_to_be32(key[3 - i])); + IXGBE_WRITE_REG(hw, IXGBE_IPSTXSALT, cpu_to_be32(salt)); + IXGBE_WRITE_FLUSH(hw); + + reg = IXGBE_READ_REG(hw, IXGBE_IPSTXIDX); + reg &= IXGBE_RXTXIDX_IPS_EN; + reg |= idx << IXGBE_RXTXIDX_IDX_SHIFT | IXGBE_RXTXIDX_WRITE; + IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, reg); + IXGBE_WRITE_FLUSH(hw); +} + +/** + * ixgbe_ipsec_set_rx_item - set an Rx table item + * @hw: hw specific details + * @idx: register index to write + * @tbl: table selector + * + * Trigger the device to store into a particular Rx table the + * data that has already been loaded into the input register + **/ +static void ixgbe_ipsec_set_rx_item(struct ixgbe_hw *hw, u16 idx, + enum ixgbe_ipsec_tbl_sel tbl) +{ + u32 reg; + + reg = IXGBE_READ_REG(hw, IXGBE_IPSRXIDX); + reg &= IXGBE_RXTXIDX_IPS_EN; + reg |= tbl << IXGBE_RXIDX_TBL_SHIFT | + idx << IXGBE_RXTXIDX_IDX_SHIFT | + IXGBE_RXTXIDX_WRITE; + IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, reg); + IXGBE_WRITE_FLUSH(hw); +} + +/** + * ixgbe_ipsec_set_rx_sa - set up the register bits to save SA info + * @hw: hw specific details + * @idx: register index to write + * @spi: security parameter index + * @key: key byte array + * @salt: salt bytes + * @mode: rx decrypt control bits + * @ip_idx: index into IP table for related IP address + **/ +static void ixgbe_ipsec_set_rx_sa(struct ixgbe_hw *hw, u16 idx, __be32 spi, + u32 key[], u32 salt, u32 mode, u32 ip_idx) +{ + int i; + + /* store the SPI (in bigendian) and IPidx */ + IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI, cpu_to_le32(spi)); + IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPIDX, ip_idx); + IXGBE_WRITE_FLUSH(hw); + + ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_spi_tbl); + + /* store the key, salt, and mode */ + for (i = 0; i < 4; i++) + IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(i), cpu_to_be32(key[3 - i])); + IXGBE_WRITE_REG(hw, IXGBE_IPSRXSALT, cpu_to_be32(salt)); + IXGBE_WRITE_REG(hw, IXGBE_IPSRXMOD, mode); + IXGBE_WRITE_FLUSH(hw); + + ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_key_tbl); +} + +/** + * ixgbe_ipsec_set_rx_ip - set up the register bits to save SA IP addr info + * @hw: hw specific details + * @idx: register index to write + * @addr: IP address byte array + **/ +static void ixgbe_ipsec_set_rx_ip(struct ixgbe_hw *hw, u16 idx, __be32 addr[]) +{ + int i; + + /* store the ip address */ + for (i = 0; i < 4; i++) + IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(i), cpu_to_le32(addr[i])); + IXGBE_WRITE_FLUSH(hw); + + ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_ip_tbl); +} + +/** + * ixgbe_ipsec_clear_hw_tables - because some tables don't get cleared on reset + * @adapter: board private structure + **/ +static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter) +{ + struct ixgbe_ipsec *ipsec = adapter->ipsec; + struct ixgbe_hw *hw = &adapter->hw; + u32 buf[4] = {0, 0, 0, 0}; + u16 idx; + + /* disable Rx and Tx SA lookup */ + IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0); + IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0); + + /* scrub the tables - split the loops for the max of the IP table */ + for (idx = 0; idx < IXGBE_IPSEC_MAX_RX_IP_COUNT; idx++) { + ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0); + ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0); + ixgbe_ipsec_set_rx_ip(hw, idx, (__be32 *)buf); + } + for (; idx < IXGBE_IPSEC_MAX_SA_COUNT; idx++) { + ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0); + ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0); + } + + ipsec->num_rx_sa = 0; + ipsec->num_tx_sa = 0; +} + +/** + * ixgbe_ipsec_stop_data + * @adapter: board private structure + **/ +static void ixgbe_ipsec_stop_data(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + bool link = adapter->link_up; + u32 t_rdy, r_rdy; + u32 limit; + u32 reg; + + /* halt data paths */ + reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL); + reg |= IXGBE_SECTXCTRL_TX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg); + + reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); + reg |= IXGBE_SECRXCTRL_RX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg); + + IXGBE_WRITE_FLUSH(hw); + + /* If the tx fifo doesn't have link, but still has data, + * we can't clear the tx sec block. Set the MAC loopback + * before block clear + */ + if (!link) { + reg = IXGBE_READ_REG(hw, IXGBE_MACC); + reg |= IXGBE_MACC_FLU; + IXGBE_WRITE_REG(hw, IXGBE_MACC, reg); + + reg = IXGBE_READ_REG(hw, IXGBE_HLREG0); + reg |= IXGBE_HLREG0_LPBK; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg); + + IXGBE_WRITE_FLUSH(hw); + mdelay(3); + } + + /* wait for the paths to empty */ + limit = 20; + do { + mdelay(10); + t_rdy = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT) & + IXGBE_SECTXSTAT_SECTX_RDY; + r_rdy = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT) & + IXGBE_SECRXSTAT_SECRX_RDY; + } while (!t_rdy && !r_rdy && limit--); + + /* undo loopback if we played with it earlier */ + if (!link) { + reg = IXGBE_READ_REG(hw, IXGBE_MACC); + reg &= ~IXGBE_MACC_FLU; + IXGBE_WRITE_REG(hw, IXGBE_MACC, reg); + + reg = IXGBE_READ_REG(hw, IXGBE_HLREG0); + reg &= ~IXGBE_HLREG0_LPBK; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg); + + IXGBE_WRITE_FLUSH(hw); + } +} + +/** + * ixgbe_ipsec_stop_engine + * @adapter: board private structure + **/ +static void ixgbe_ipsec_stop_engine(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 reg; + + ixgbe_ipsec_stop_data(adapter); + + /* disable Rx and Tx SA lookup */ + IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0); + IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0); + + /* disable the Rx and Tx engines and full packet store-n-forward */ + reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL); + reg |= IXGBE_SECTXCTRL_SECTX_DIS; + reg &= ~IXGBE_SECTXCTRL_STORE_FORWARD; + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg); + + reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); + reg |= IXGBE_SECRXCTRL_SECRX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg); + + /* restore the "tx security buffer almost full threshold" to 0x250 */ + IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, 0x250); + + /* Set minimum IFG between packets back to the default 0x1 */ + reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG); + reg = (reg & 0xfffffff0) | 0x1; + IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg); + + /* final set for normal (no ipsec offload) processing */ + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_SECTX_DIS); + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, IXGBE_SECRXCTRL_SECRX_DIS); + + IXGBE_WRITE_FLUSH(hw); +} + +/** + * ixgbe_ipsec_start_engine + * @adapter: board private structure + * + * NOTE: this increases power consumption whether being used or not + **/ +static void ixgbe_ipsec_start_engine(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 reg; + + ixgbe_ipsec_stop_data(adapter); + + /* Set minimum IFG between packets to 3 */ + reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG); + reg = (reg & 0xfffffff0) | 0x3; + IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg); + + /* Set "tx security buffer almost full threshold" to 0x15 so that the + * almost full indication is generated only after buffer contains at + * least an entire jumbo packet. + */ + reg = IXGBE_READ_REG(hw, IXGBE_SECTXBUFFAF); + reg = (reg & 0xfffffc00) | 0x15; + IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, reg); + + /* restart the data paths by clearing the DISABLE bits */ + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, 0); + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_STORE_FORWARD); + + /* enable Rx and Tx SA lookup */ + IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, IXGBE_RXTXIDX_IPS_EN); + IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, IXGBE_RXTXIDX_IPS_EN); + + IXGBE_WRITE_FLUSH(hw); +} + +/** + * ixgbe_ipsec_restore - restore the ipsec HW settings after a reset + * @adapter: board private structure + **/ +void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) +{ + struct ixgbe_ipsec *ipsec = adapter->ipsec; + struct ixgbe_hw *hw = &adapter->hw; + int i; + + if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED)) + return; + + /* clean up and restart the engine */ + ixgbe_ipsec_stop_engine(adapter); + ixgbe_ipsec_clear_hw_tables(adapter); + ixgbe_ipsec_start_engine(adapter); + + /* reload the IP addrs */ + for (i = 0; i < IXGBE_IPSEC_MAX_RX_IP_COUNT; i++) { + struct rx_ip_sa *ipsa = &ipsec->ip_tbl[i]; + + if (ipsa->used) + ixgbe_ipsec_set_rx_ip(hw, i, ipsa->ipaddr); + } + + /* reload the Rx and Tx keys */ + for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) { + struct rx_sa *rsa = &ipsec->rx_tbl[i]; + struct tx_sa *tsa = &ipsec->tx_tbl[i]; + + if (rsa->used) + ixgbe_ipsec_set_rx_sa(hw, i, rsa->xs->id.spi, + rsa->key, rsa->salt, + rsa->mode, rsa->iptbl_ind); + + if (tsa->used) + ixgbe_ipsec_set_tx_sa(hw, i, tsa->key, tsa->salt); + } +} + +/** + * ixgbe_ipsec_find_empty_idx - find the first unused security parameter index + * @ipsec: pointer to ipsec struct + * @rxtable: true if we need to look in the Rx table + * + * Returns the first unused index in either the Rx or Tx SA table + **/ +static int ixgbe_ipsec_find_empty_idx(struct ixgbe_ipsec *ipsec, bool rxtable) +{ + u32 i; + + if (rxtable) { + if (ipsec->num_rx_sa == IXGBE_IPSEC_MAX_SA_COUNT) + return -ENOSPC; + + /* search rx sa table */ + for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) { + if (!ipsec->rx_tbl[i].used) + return i; + } + } else { + if (ipsec->num_tx_sa == IXGBE_IPSEC_MAX_SA_COUNT) + return -ENOSPC; + + /* search tx sa table */ + for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) { + if (!ipsec->tx_tbl[i].used) + return i; + } + } + + return -ENOSPC; +} + +/** + * ixgbe_ipsec_find_rx_state - find the state that matches + * @ipsec: pointer to ipsec struct + * @daddr: inbound address to match + * @proto: protocol to match + * @spi: SPI to match + * @ip4: true if using an ipv4 address + * + * Returns a pointer to the matching SA state information + **/ +static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec, + __be32 *daddr, u8 proto, + __be32 spi, bool ip4) +{ + struct rx_sa *rsa; + struct xfrm_state *ret = NULL; + + rcu_read_lock(); + hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist, spi) + if (spi == rsa->xs->id.spi && + ((ip4 && *daddr == rsa->xs->id.daddr.a4) || + (!ip4 && !memcmp(daddr, &rsa->xs->id.daddr.a6, + sizeof(rsa->xs->id.daddr.a6)))) && + proto == rsa->xs->id.proto) { + ret = rsa->xs; + xfrm_state_hold(ret); + break; + } + rcu_read_unlock(); + return ret; +} + +/** + * ixgbe_ipsec_parse_proto_keys - find the key and salt based on the protocol + * @xs: pointer to xfrm_state struct + * @mykey: pointer to key array to populate + * @mysalt: pointer to salt value to populate + * + * This copies the protocol keys and salt to our own data tables. The + * 82599 family only supports the one algorithm. + **/ +static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs, + u32 *mykey, u32 *mysalt) +{ + struct net_device *dev = xs->xso.dev; + unsigned char *key_data; + char *alg_name = NULL; + const char aes_gcm_name[] = "rfc4106(gcm(aes))"; + int key_len; + + if (xs->aead) { + key_data = &xs->aead->alg_key[0]; + key_len = xs->aead->alg_key_len; + alg_name = xs->aead->alg_name; + } else { + netdev_err(dev, "Unsupported IPsec algorithm\n"); + return -EINVAL; + } + + if (strcmp(alg_name, aes_gcm_name)) { + netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n", + aes_gcm_name); + return -EINVAL; + } + + /* The key bytes come down in a bigendian array of bytes, so + * we don't need to do any byteswapping. + * 160 accounts for 16 byte key and 4 byte salt + */ + if (key_len == 160) { + *mysalt = ((u32 *)key_data)[4]; + } else if (key_len != 128) { + netdev_err(dev, "IPsec hw offload only supports keys up to 128 bits with a 32 bit salt\n"); + return -EINVAL; + } else { + netdev_info(dev, "IPsec hw offload parameters missing 32 bit salt value\n"); + *mysalt = 0; + } + memcpy(mykey, key_data, 16); + + return 0; +} + +/** + * ixgbe_ipsec_add_sa - program device with a security association + * @xs: pointer to transformer state struct + **/ +static int ixgbe_ipsec_add_sa(struct xfrm_state *xs) +{ + struct net_device *dev = xs->xso.dev; + struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_ipsec *ipsec = adapter->ipsec; + struct ixgbe_hw *hw = &adapter->hw; + int checked, match, first; + u16 sa_idx; + int ret; + int i; + + if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) { + netdev_err(dev, "Unsupported protocol 0x%04x for ipsec offload\n", + xs->id.proto); + return -EINVAL; + } + + if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) { + struct rx_sa rsa; + + if (xs->calg) { + netdev_err(dev, "Compression offload not supported\n"); + return -EINVAL; + } + + /* find the first unused index */ + ret = ixgbe_ipsec_find_empty_idx(ipsec, true); + if (ret < 0) { + netdev_err(dev, "No space for SA in Rx table!\n"); + return ret; + } + sa_idx = (u16)ret; + + memset(&rsa, 0, sizeof(rsa)); + rsa.used = true; + rsa.xs = xs; + + if (rsa.xs->id.proto & IPPROTO_ESP) + rsa.decrypt = xs->ealg || xs->aead; + + /* get the key and salt */ + ret = ixgbe_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt); + if (ret) { + netdev_err(dev, "Failed to get key data for Rx SA table\n"); + return ret; + } + + /* get ip for rx sa table */ + if (xs->props.family == AF_INET6) + memcpy(rsa.ipaddr, &xs->id.daddr.a6, 16); + else + memcpy(&rsa.ipaddr[3], &xs->id.daddr.a4, 4); + + /* The HW does not have a 1:1 mapping from keys to IP addrs, so + * check for a matching IP addr entry in the table. If the addr + * already exists, use it; else find an unused slot and add the + * addr. If one does not exist and there are no unused table + * entries, fail the request. + */ + + /* Find an existing match or first not used, and stop looking + * after we've checked all we know we have. + */ + checked = 0; + match = -1; + first = -1; + for (i = 0; + i < IXGBE_IPSEC_MAX_RX_IP_COUNT && + (checked < ipsec->num_rx_sa || first < 0); + i++) { + if (ipsec->ip_tbl[i].used) { + if (!memcmp(ipsec->ip_tbl[i].ipaddr, + rsa.ipaddr, sizeof(rsa.ipaddr))) { + match = i; + break; + } + checked++; + } else if (first < 0) { + first = i; /* track the first empty seen */ + } + } + + if (ipsec->num_rx_sa == 0) + first = 0; + + if (match >= 0) { + /* addrs are the same, we should use this one */ + rsa.iptbl_ind = match; + ipsec->ip_tbl[match].ref_cnt++; + + } else if (first >= 0) { + /* no matches, but here's an empty slot */ + rsa.iptbl_ind = first; + + memcpy(ipsec->ip_tbl[first].ipaddr, + rsa.ipaddr, sizeof(rsa.ipaddr)); + ipsec->ip_tbl[first].ref_cnt = 1; + ipsec->ip_tbl[first].used = true; + + ixgbe_ipsec_set_rx_ip(hw, rsa.iptbl_ind, rsa.ipaddr); + + } else { + /* no match and no empty slot */ + netdev_err(dev, "No space for SA in Rx IP SA table\n"); + memset(&rsa, 0, sizeof(rsa)); + return -ENOSPC; + } + + rsa.mode = IXGBE_RXMOD_VALID; + if (rsa.xs->id.proto & IPPROTO_ESP) + rsa.mode |= IXGBE_RXMOD_PROTO_ESP; + if (rsa.decrypt) + rsa.mode |= IXGBE_RXMOD_DECRYPT; + if (rsa.xs->props.family == AF_INET6) + rsa.mode |= IXGBE_RXMOD_IPV6; + + /* the preparations worked, so save the info */ + memcpy(&ipsec->rx_tbl[sa_idx], &rsa, sizeof(rsa)); + + ixgbe_ipsec_set_rx_sa(hw, sa_idx, rsa.xs->id.spi, rsa.key, + rsa.salt, rsa.mode, rsa.iptbl_ind); + xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_RX_INDEX; + + ipsec->num_rx_sa++; + + /* hash the new entry for faster search in Rx path */ + hash_add_rcu(ipsec->rx_sa_list, &ipsec->rx_tbl[sa_idx].hlist, + rsa.xs->id.spi); + } else { + struct tx_sa tsa; + + /* find the first unused index */ + ret = ixgbe_ipsec_find_empty_idx(ipsec, false); + if (ret < 0) { + netdev_err(dev, "No space for SA in Tx table\n"); + return ret; + } + sa_idx = (u16)ret; + + memset(&tsa, 0, sizeof(tsa)); + tsa.used = true; + tsa.xs = xs; + + if (xs->id.proto & IPPROTO_ESP) + tsa.encrypt = xs->ealg || xs->aead; + + ret = ixgbe_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt); + if (ret) { + netdev_err(dev, "Failed to get key data for Tx SA table\n"); + memset(&tsa, 0, sizeof(tsa)); + return ret; + } + + /* the preparations worked, so save the info */ + memcpy(&ipsec->tx_tbl[sa_idx], &tsa, sizeof(tsa)); + + ixgbe_ipsec_set_tx_sa(hw, sa_idx, tsa.key, tsa.salt); + + xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_TX_INDEX; + + ipsec->num_tx_sa++; + } + + /* enable the engine if not already warmed up */ + if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED)) { + ixgbe_ipsec_start_engine(adapter); + adapter->flags2 |= IXGBE_FLAG2_IPSEC_ENABLED; + } + + return 0; +} + +/** + * ixgbe_ipsec_del_sa - clear out this specific SA + * @xs: pointer to transformer state struct + **/ +static void ixgbe_ipsec_del_sa(struct xfrm_state *xs) +{ + struct net_device *dev = xs->xso.dev; + struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_ipsec *ipsec = adapter->ipsec; + struct ixgbe_hw *hw = &adapter->hw; + u32 zerobuf[4] = {0, 0, 0, 0}; + u16 sa_idx; + + if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) { + struct rx_sa *rsa; + u8 ipi; + + sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX; + rsa = &ipsec->rx_tbl[sa_idx]; + + if (!rsa->used) { + netdev_err(dev, "Invalid Rx SA selected sa_idx=%d offload_handle=%lu\n", + sa_idx, xs->xso.offload_handle); + return; + } + + ixgbe_ipsec_set_rx_sa(hw, sa_idx, 0, zerobuf, 0, 0, 0); + hash_del_rcu(&rsa->hlist); + + /* if the IP table entry is referenced by only this SA, + * i.e. ref_cnt is only 1, clear the IP table entry as well + */ + ipi = rsa->iptbl_ind; + if (ipsec->ip_tbl[ipi].ref_cnt > 0) { + ipsec->ip_tbl[ipi].ref_cnt--; + + if (!ipsec->ip_tbl[ipi].ref_cnt) { + memset(&ipsec->ip_tbl[ipi], 0, + sizeof(struct rx_ip_sa)); + ixgbe_ipsec_set_rx_ip(hw, ipi, zerobuf); + } + } + + memset(rsa, 0, sizeof(struct rx_sa)); + ipsec->num_rx_sa--; + } else { + sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX; + + if (!ipsec->tx_tbl[sa_idx].used) { + netdev_err(dev, "Invalid Tx SA selected sa_idx=%d offload_handle=%lu\n", + sa_idx, xs->xso.offload_handle); + return; + } + + ixgbe_ipsec_set_tx_sa(hw, sa_idx, zerobuf, 0); + memset(&ipsec->tx_tbl[sa_idx], 0, sizeof(struct tx_sa)); + ipsec->num_tx_sa--; + } + + /* if there are no SAs left, stop the engine to save energy */ + if (ipsec->num_rx_sa == 0 && ipsec->num_tx_sa == 0) { + adapter->flags2 &= ~IXGBE_FLAG2_IPSEC_ENABLED; + ixgbe_ipsec_stop_engine(adapter); + } +} + +/** + * ixgbe_ipsec_offload_ok - can this packet use the xfrm hw offload + * @skb: current data packet + * @xs: pointer to transformer state struct + **/ +static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) +{ + if (xs->props.family == AF_INET) { + /* Offload with IPv4 options is not supported yet */ + if (ip_hdr(skb)->ihl != 5) + return false; + } else { + /* Offload with IPv6 extension headers is not support yet */ + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) + return false; + } + + return true; +} + +/** + * ixgbe_ipsec_free - called by xfrm garbage collections + * @xs: pointer to transformer state struct + * + * We don't have any garbage to collect, so we shouldn't bother + * implementing this function, but the XFRM code doesn't check for + * existence before calling the API callback. + **/ +static void ixgbe_ipsec_free(struct xfrm_state *xs) +{ +} + +static const struct xfrmdev_ops ixgbe_xfrmdev_ops = { + .xdo_dev_state_add = ixgbe_ipsec_add_sa, + .xdo_dev_state_delete = ixgbe_ipsec_del_sa, + .xdo_dev_offload_ok = ixgbe_ipsec_offload_ok, + .xdo_dev_state_free = ixgbe_ipsec_free, +}; + +/** + * ixgbe_ipsec_tx - setup Tx flags for ipsec offload + * @tx_ring: outgoing context + * @first: current data packet + * @itd: ipsec Tx data for later use in building context descriptor + **/ +int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, + struct ixgbe_ipsec_tx_data *itd) +{ + struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev); + struct ixgbe_ipsec *ipsec = adapter->ipsec; + struct xfrm_state *xs; + struct tx_sa *tsa; + + if (unlikely(!first->skb->sp->len)) { + netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n", + __func__, first->skb->sp->len); + return 0; + } + + xs = xfrm_input_state(first->skb); + if (unlikely(!xs)) { + netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs = %p\n", + __func__, xs); + return 0; + } + + itd->sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX; + if (unlikely(itd->sa_idx > IXGBE_IPSEC_MAX_SA_COUNT)) { + netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d handle=%lu\n", + __func__, itd->sa_idx, xs->xso.offload_handle); + return 0; + } + + tsa = &ipsec->tx_tbl[itd->sa_idx]; + if (unlikely(!tsa->used)) { + netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n", + __func__, itd->sa_idx); + return 0; + } + + first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC; + + itd->flags = 0; + if (xs->id.proto == IPPROTO_ESP) { + itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP | + IXGBE_ADVTXD_TUCMD_L4T_TCP; + if (first->protocol == htons(ETH_P_IP)) + itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4; + itd->trailer_len = xs->props.trailer_len; + } + if (tsa->encrypt) + itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN; + + return 1; +} + +/** + * ixgbe_ipsec_rx - decode ipsec bits from Rx descriptor + * @rx_ring: receiving ring + * @rx_desc: receive data descriptor + * @skb: current data packet + * + * Determine if there was an ipsec encapsulation noticed, and if so set up + * the resulting status for later in the receive stack. + **/ +void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct ixgbe_adapter *adapter = netdev_priv(rx_ring->netdev); + __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; + __le16 ipsec_pkt_types = cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH | + IXGBE_RXDADV_PKTTYPE_IPSEC_ESP); + struct ixgbe_ipsec *ipsec = adapter->ipsec; + struct xfrm_offload *xo = NULL; + struct xfrm_state *xs = NULL; + struct ipv6hdr *ip6 = NULL; + struct iphdr *ip4 = NULL; + void *daddr; + __be32 spi; + u8 *c_hdr; + u8 proto; + + /* Find the ip and crypto headers in the data. + * We can assume no vlan header in the way, b/c the + * hw won't recognize the IPsec packet and anyway the + * currently vlan device doesn't support xfrm offload. + */ + if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV4)) { + ip4 = (struct iphdr *)(skb->data + ETH_HLEN); + daddr = &ip4->daddr; + c_hdr = (u8 *)ip4 + ip4->ihl * 4; + } else if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV6)) { + ip6 = (struct ipv6hdr *)(skb->data + ETH_HLEN); + daddr = &ip6->daddr; + c_hdr = (u8 *)ip6 + sizeof(struct ipv6hdr); + } else { + return; + } + + switch (pkt_info & ipsec_pkt_types) { + case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH): + spi = ((struct ip_auth_hdr *)c_hdr)->spi; + proto = IPPROTO_AH; + break; + case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_ESP): + spi = ((struct ip_esp_hdr *)c_hdr)->spi; + proto = IPPROTO_ESP; + break; + default: + return; + } + + xs = ixgbe_ipsec_find_rx_state(ipsec, daddr, proto, spi, !!ip4); + if (unlikely(!xs)) + return; + + skb->sp = secpath_dup(skb->sp); + if (unlikely(!skb->sp)) + return; + + skb->sp->xvec[skb->sp->len++] = xs; + skb->sp->olen++; + xo = xfrm_offload(skb); + xo->flags = CRYPTO_DONE; + xo->status = CRYPTO_SUCCESS; + + adapter->rx_ipsec++; +} + +/** + * ixgbe_init_ipsec_offload - initialize security registers for IPSec operation + * @adapter: board private structure + **/ +void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) +{ + struct ixgbe_ipsec *ipsec; + size_t size; + + if (adapter->hw.mac.type == ixgbe_mac_82598EB) + return; + + ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL); + if (!ipsec) + goto err1; + hash_init(ipsec->rx_sa_list); + + size = sizeof(struct rx_sa) * IXGBE_IPSEC_MAX_SA_COUNT; + ipsec->rx_tbl = kzalloc(size, GFP_KERNEL); + if (!ipsec->rx_tbl) + goto err2; + + size = sizeof(struct tx_sa) * IXGBE_IPSEC_MAX_SA_COUNT; + ipsec->tx_tbl = kzalloc(size, GFP_KERNEL); + if (!ipsec->tx_tbl) + goto err2; + + size = sizeof(struct rx_ip_sa) * IXGBE_IPSEC_MAX_RX_IP_COUNT; + ipsec->ip_tbl = kzalloc(size, GFP_KERNEL); + if (!ipsec->ip_tbl) + goto err2; + + ipsec->num_rx_sa = 0; + ipsec->num_tx_sa = 0; + + adapter->ipsec = ipsec; + ixgbe_ipsec_stop_engine(adapter); + ixgbe_ipsec_clear_hw_tables(adapter); + + adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops; + adapter->netdev->features |= NETIF_F_HW_ESP; + adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP; + + return; + +err2: + kfree(ipsec->ip_tbl); + kfree(ipsec->rx_tbl); + kfree(ipsec->tx_tbl); +err1: + kfree(adapter->ipsec); + netdev_err(adapter->netdev, "Unable to allocate memory for SA tables"); +} + +/** + * ixgbe_stop_ipsec_offload - tear down the ipsec offload + * @adapter: board private structure + **/ +void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) +{ + struct ixgbe_ipsec *ipsec = adapter->ipsec; + + adapter->ipsec = NULL; + if (ipsec) { + kfree(ipsec->ip_tbl); + kfree(ipsec->rx_tbl); + kfree(ipsec->tx_tbl); + kfree(ipsec); + } +} diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h new file mode 100644 index 000000000000..da3ce7849e85 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h @@ -0,0 +1,93 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS <[email protected]> + e1000-devel Mailing List <[email protected]> + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_IPSEC_H_ +#define _IXGBE_IPSEC_H_ + +#define IXGBE_IPSEC_MAX_SA_COUNT 1024 +#define IXGBE_IPSEC_MAX_RX_IP_COUNT 128 +#define IXGBE_IPSEC_BASE_RX_INDEX 0 +#define IXGBE_IPSEC_BASE_TX_INDEX IXGBE_IPSEC_MAX_SA_COUNT + +#define IXGBE_RXTXIDX_IPS_EN 0x00000001 +#define IXGBE_RXIDX_TBL_SHIFT 1 +enum ixgbe_ipsec_tbl_sel { + ips_rx_ip_tbl = 0x01, + ips_rx_spi_tbl = 0x02, + ips_rx_key_tbl = 0x03, +}; + +#define IXGBE_RXTXIDX_IDX_SHIFT 3 +#define IXGBE_RXTXIDX_READ 0x40000000 +#define IXGBE_RXTXIDX_WRITE 0x80000000 + +#define IXGBE_RXMOD_VALID 0x00000001 +#define IXGBE_RXMOD_PROTO_ESP 0x00000004 +#define IXGBE_RXMOD_DECRYPT 0x00000008 +#define IXGBE_RXMOD_IPV6 0x00000010 + +struct rx_sa { + struct hlist_node hlist; + struct xfrm_state *xs; + __be32 ipaddr[4]; + u32 key[4]; + u32 salt; + u32 mode; + u8 iptbl_ind; + bool used; + bool decrypt; +}; + +struct rx_ip_sa { + __be32 ipaddr[4]; + u32 ref_cnt; + bool used; +}; + +struct tx_sa { + struct xfrm_state *xs; + u32 key[4]; + u32 salt; + bool encrypt; + bool used; +}; + +struct ixgbe_ipsec_tx_data { + u32 flags; + u16 trailer_len; + u16 sa_idx; +}; + +struct ixgbe_ipsec { + u16 num_rx_sa; + u16 num_tx_sa; + struct rx_ip_sa *ip_tbl; + struct rx_sa *rx_tbl; + struct tx_sa *tx_tbl; + DECLARE_HASHTABLE(rx_sa_list, 10); +}; +#endif /* _IXGBE_IPSEC_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index b3c282d09b18..4242f0213e46 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -1282,7 +1282,7 @@ void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter) } void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens, - u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx) + u32 fceof_saidx, u32 type_tucmd, u32 mss_l4len_idx) { struct ixgbe_adv_tx_context_desc *context_desc; u16 i = tx_ring->next_to_use; @@ -1296,7 +1296,7 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens, type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); - context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof); + context_desc->fceof_saidx = cpu_to_le32(fceof_saidx); context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4f28621b76e1..722cc3153a99 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1171,7 +1171,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_adapter *adapter = q_vector->adapter; struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; - unsigned int total_bytes = 0, total_packets = 0; + unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0; unsigned int budget = q_vector->tx.work_limit; unsigned int i = tx_ring->next_to_clean; @@ -1202,6 +1202,8 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, /* update the statistics for this packet */ total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; + if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC) + total_ipsec++; /* free the skb */ if (ring_is_xdp(tx_ring)) @@ -1264,6 +1266,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, u64_stats_update_end(&tx_ring->syncp); q_vector->tx.total_bytes += total_bytes; q_vector->tx.total_packets += total_packets; + adapter->tx_ipsec += total_ipsec; if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) { /* schedule immediate reset if we believe we hung */ @@ -1752,6 +1755,9 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } + if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) + ixgbe_ipsec_rx(rx_ring, rx_desc, skb); + skb->protocol = eth_type_trans(skb, dev); /* record Rx queue, or update MACVLAN statistics */ @@ -5425,6 +5431,7 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) ixgbe_set_rx_mode(adapter->netdev); ixgbe_restore_vlan(adapter); + ixgbe_ipsec_restore(adapter); switch (hw->mac.type) { case ixgbe_mac_82599EB: @@ -7795,10 +7802,12 @@ static inline bool ixgbe_ipv6_csum_is_sctp(struct sk_buff *skb) } static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, - struct ixgbe_tx_buffer *first) + struct ixgbe_tx_buffer *first, + struct ixgbe_ipsec_tx_data *itd) { struct sk_buff *skb = first->skb; u32 vlan_macip_lens = 0; + u32 fceof_saidx = 0; u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { @@ -7839,7 +7848,12 @@ no_csum: vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; - ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, 0); + if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) { + fceof_saidx |= itd->sa_idx; + type_tucmd |= itd->flags | itd->trailer_len; + } + + ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0); } #define IXGBE_SET_FLAG(_input, _flag, _result) \ @@ -7882,11 +7896,16 @@ static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc, IXGBE_TX_FLAGS_CSUM, IXGBE_ADVTXD_POPTS_TXSM); - /* enble IPv4 checksum for TSO */ + /* enable IPv4 checksum for TSO */ olinfo_status |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_IPV4, IXGBE_ADVTXD_POPTS_IXSM); + /* enable IPsec */ + olinfo_status |= IXGBE_SET_FLAG(tx_flags, + IXGBE_TX_FLAGS_IPSEC, + IXGBE_ADVTXD_POPTS_IPSEC); + /* * Check Context must be set if Tx switch is enabled, which it * always is for case where virtual functions are running @@ -8350,6 +8369,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, u32 tx_flags = 0; unsigned short f; u16 count = TXD_USE_COUNT(skb_headlen(skb)); + struct ixgbe_ipsec_tx_data ipsec_tx = { 0 }; __be16 protocol = skb->protocol; u8 hdr_len = 0; @@ -8454,11 +8474,16 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, } #endif /* IXGBE_FCOE */ + +#ifdef CONFIG_XFRM_OFFLOAD + if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx)) + goto out_drop; +#endif tso = ixgbe_tso(tx_ring, first, &hdr_len); if (tso < 0) goto out_drop; else if (!tso) - ixgbe_tx_csum(tx_ring, first); + ixgbe_tx_csum(tx_ring, first, &ipsec_tx); /* add the ATR filter if ATR is on */ if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state)) @@ -9870,6 +9895,12 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) features &= ~NETIF_F_TSO; +#ifdef CONFIG_XFRM_OFFLOAD + /* IPsec offload doesn't get along well with others *yet* */ + if (skb->sp) + features &= ~(NETIF_F_TSO | NETIF_F_HW_CSUM); +#endif + return features; } @@ -10459,6 +10490,7 @@ skip_sriov: NETIF_F_FCOE_MTU; } #endif /* IXGBE_FCOE */ + ixgbe_init_ipsec_offload(adapter); if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) netdev->hw_features |= NETIF_F_LRO; @@ -10694,6 +10726,7 @@ static void ixgbe_remove(struct pci_dev *pdev) if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); + ixgbe_stop_ipsec_offload(adapter); ixgbe_clear_interrupt_scheme(adapter); ixgbe_release_hw_control(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 21eb79ae3c30..ca45359686d3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -2360,11 +2360,6 @@ enum { #define IXGBE_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ #define IXGBE_TXD_STAT_DD 0x00000001 /* Descriptor Done */ -#define IXGBE_RXDADV_IPSEC_STATUS_SECP 0x00020000 -#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000 -#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000 -#define IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED 0x18000000 -#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000 /* Multiple Transmit Queue Command Register */ #define IXGBE_MTQC_RT_ENA 0x1 /* DCB Enable */ #define IXGBE_MTQC_VT_ENA 0x2 /* VMDQ2 Enable */ @@ -2416,6 +2411,9 @@ enum { #define IXGBE_RXDADV_ERR_LE 0x02000000 /* Length Error */ #define IXGBE_RXDADV_ERR_PE 0x08000000 /* Packet Error */ #define IXGBE_RXDADV_ERR_OSE 0x10000000 /* Oversize Error */ +#define IXGBE_RXDADV_ERR_IPSEC_INV_PROTOCOL 0x08000000 /* overlap ERR_PE */ +#define IXGBE_RXDADV_ERR_IPSEC_INV_LENGTH 0x10000000 /* overlap ERR_OSE */ +#define IXGBE_RXDADV_ERR_IPSEC_AUTH_FAILED 0x18000000 #define IXGBE_RXDADV_ERR_USE 0x20000000 /* Undersize Error */ #define IXGBE_RXDADV_ERR_TCPE 0x40000000 /* TCP/UDP Checksum Error */ #define IXGBE_RXDADV_ERR_IPE 0x80000000 /* IP Checksum Error */ @@ -2437,6 +2435,7 @@ enum { #define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */ #define IXGBE_RXDADV_STAT_FCSTAT_DDP 0x00000030 /* 11: Ctxt w/ DDP */ #define IXGBE_RXDADV_STAT_TS 0x00010000 /* IEEE 1588 Time Stamp */ +#define IXGBE_RXDADV_STAT_SECP 0x00020000 /* IPsec/MACsec pkt found */ /* PSRTYPE bit definitions */ #define IXGBE_PSRTYPE_TCPHDR 0x00000010 @@ -2503,13 +2502,6 @@ enum { #define IXGBE_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */ #define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */ -/* Security Processing bit Indication */ -#define IXGBE_RXDADV_LNKSEC_STATUS_SECP 0x00020000 -#define IXGBE_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000 -#define IXGBE_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000 -#define IXGBE_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000 -#define IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000 - /* Masks to determine if packets should be dropped due to frame errors */ #define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \ IXGBE_RXD_ERR_CE | \ @@ -2523,6 +2515,8 @@ enum { IXGBE_RXDADV_ERR_LE | \ IXGBE_RXDADV_ERR_PE | \ IXGBE_RXDADV_ERR_OSE | \ + IXGBE_RXDADV_ERR_IPSEC_INV_PROTOCOL | \ + IXGBE_RXDADV_ERR_IPSEC_INV_LENGTH | \ IXGBE_RXDADV_ERR_USE) /* Multicast bit mask */ @@ -2901,7 +2895,7 @@ union ixgbe_adv_rx_desc { /* Context descriptors */ struct ixgbe_adv_tx_context_desc { __le32 vlan_macip_lens; - __le32 seqnum_seed; + __le32 fceof_saidx; __le32 type_tucmd_mlhl; __le32 mss_l4len_idx; }; @@ -2932,6 +2926,7 @@ struct ixgbe_adv_tx_context_desc { IXGBE_ADVTXD_POPTS_SHIFT) #define IXGBE_ADVTXD_POPTS_TXSM (IXGBE_TXD_POPTS_TXSM << \ IXGBE_ADVTXD_POPTS_SHIFT) +#define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */ #define IXGBE_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */ #define IXGBE_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */ #define IXGBE_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */ @@ -2947,7 +2942,6 @@ struct ixgbe_adv_tx_context_desc { #define IXGBE_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */ #define IXGBE_ADVTXD_TUCMD_L4T_RSV 0x00001800 /* RSV L4 Packet TYPE */ #define IXGBE_ADVTXD_TUCMD_MKRREQ 0x00002000 /*Req requires Markers and CRC*/ -#define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */ #define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ #define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */ #define IXGBE_ADVTXT_TUCMD_FCOE 0x00008000 /* FCoE Frame Type */ diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index a19760736b71..a1d7b88cf083 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -10,6 +10,7 @@ * warranty of any kind, whether express or implied. */ +#include <linux/acpi.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> @@ -865,7 +866,7 @@ struct mvpp2 { /* List of pointers to port structures */ int port_count; - struct mvpp2_port **port_list; + struct mvpp2_port *port_list[MVPP2_MAX_PORTS]; /* Aggregated TXQs */ struct mvpp2_tx_queue *aggr_txqs; @@ -932,6 +933,9 @@ struct mvpp2_port { struct mvpp2 *priv; + /* Firmware node associated to the port */ + struct fwnode_handle *fwnode; + /* Per-port registers' base address */ void __iomem *base; void __iomem *stats_base; @@ -7499,7 +7503,10 @@ static int mvpp2_multi_queue_vectors_init(struct mvpp2_port *port, strncpy(irqname, "rx-shared", sizeof(irqname)); } - v->irq = of_irq_get_byname(port_node, irqname); + if (port_node) + v->irq = of_irq_get_byname(port_node, irqname); + else + v->irq = fwnode_irq_get(port->fwnode, i); if (v->irq <= 0) { ret = -EINVAL; goto err; @@ -7711,17 +7718,16 @@ static bool mvpp2_port_has_tx_irqs(struct mvpp2 *priv, } static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv, - struct device_node *port_node, + struct fwnode_handle *fwnode, char **mac_from) { struct mvpp2_port *port = netdev_priv(dev); char hw_mac_addr[ETH_ALEN] = {0}; - const char *dt_mac_addr; + char fw_mac_addr[ETH_ALEN]; - dt_mac_addr = of_get_mac_address(port_node); - if (dt_mac_addr && is_valid_ether_addr(dt_mac_addr)) { - *mac_from = "device tree"; - ether_addr_copy(dev->dev_addr, dt_mac_addr); + if (fwnode_get_mac_address(fwnode, fw_mac_addr, ETH_ALEN)) { + *mac_from = "firmware node"; + ether_addr_copy(dev->dev_addr, fw_mac_addr); return; } @@ -7740,13 +7746,14 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv, /* Ports initialization */ static int mvpp2_port_probe(struct platform_device *pdev, - struct device_node *port_node, - struct mvpp2 *priv, int index) + struct fwnode_handle *port_fwnode, + struct mvpp2 *priv) { struct device_node *phy_node; - struct phy *comphy; + struct phy *comphy = NULL; struct mvpp2_port *port; struct mvpp2_port_pcpu *port_pcpu; + struct device_node *port_node = to_of_node(port_fwnode); struct net_device *dev; struct resource *res; char *mac_from = ""; @@ -7757,7 +7764,12 @@ static int mvpp2_port_probe(struct platform_device *pdev, int phy_mode; int err, i, cpu; - has_tx_irqs = mvpp2_port_has_tx_irqs(priv, port_node); + if (port_node) { + has_tx_irqs = mvpp2_port_has_tx_irqs(priv, port_node); + } else { + has_tx_irqs = true; + queue_mode = MVPP2_QDIST_MULTI_MODE; + } if (!has_tx_irqs) queue_mode = MVPP2_QDIST_SINGLE_MODE; @@ -7772,24 +7784,30 @@ static int mvpp2_port_probe(struct platform_device *pdev, if (!dev) return -ENOMEM; - phy_node = of_parse_phandle(port_node, "phy", 0); - phy_mode = of_get_phy_mode(port_node); + if (port_node) + phy_node = of_parse_phandle(port_node, "phy", 0); + else + phy_node = NULL; + + phy_mode = fwnode_get_phy_mode(port_fwnode); if (phy_mode < 0) { dev_err(&pdev->dev, "incorrect phy mode\n"); err = phy_mode; goto err_free_netdev; } - comphy = devm_of_phy_get(&pdev->dev, port_node, NULL); - if (IS_ERR(comphy)) { - if (PTR_ERR(comphy) == -EPROBE_DEFER) { - err = -EPROBE_DEFER; - goto err_free_netdev; + if (port_node) { + comphy = devm_of_phy_get(&pdev->dev, port_node, NULL); + if (IS_ERR(comphy)) { + if (PTR_ERR(comphy) == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto err_free_netdev; + } + comphy = NULL; } - comphy = NULL; } - if (of_property_read_u32(port_node, "port-id", &id)) { + if (fwnode_property_read_u32(port_fwnode, "port-id", &id)) { err = -EINVAL; dev_err(&pdev->dev, "missing port-id value\n"); goto err_free_netdev; @@ -7802,6 +7820,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, port = netdev_priv(dev); port->dev = dev; + port->fwnode = port_fwnode; port->ntxqs = ntxqs; port->nrxqs = nrxqs; port->priv = priv; @@ -7811,7 +7830,10 @@ static int mvpp2_port_probe(struct platform_device *pdev, if (err) goto err_free_netdev; - port->link_irq = of_irq_get_byname(port_node, "link"); + if (port_node) + port->link_irq = of_irq_get_byname(port_node, "link"); + else + port->link_irq = fwnode_irq_get(port_fwnode, port->nqvecs + 1); if (port->link_irq == -EPROBE_DEFER) { err = -EPROBE_DEFER; goto err_deinit_qvecs; @@ -7820,7 +7842,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, /* the link irq is optional */ port->link_irq = 0; - if (of_property_read_bool(port_node, "marvell,loopback")) + if (fwnode_property_read_bool(port_fwnode, "marvell,loopback")) port->flags |= MVPP2_F_LOOPBACK; port->id = id; @@ -7845,8 +7867,8 @@ static int mvpp2_port_probe(struct platform_device *pdev, MVPP21_MIB_COUNTERS_OFFSET + port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ; } else { - if (of_property_read_u32(port_node, "gop-port-id", - &port->gop_id)) { + if (fwnode_property_read_u32(port_fwnode, "gop-port-id", + &port->gop_id)) { err = -EINVAL; dev_err(&pdev->dev, "missing gop-port-id value\n"); goto err_deinit_qvecs; @@ -7876,7 +7898,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, mutex_init(&port->gather_stats_lock); INIT_DELAYED_WORK(&port->stats_work, mvpp2_gather_hw_statistics); - mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from); + mvpp2_port_copy_mac_addr(dev, priv, port_fwnode, &mac_from); port->tx_ring_size = MVPP2_MAX_TXD_DFLT; port->rx_ring_size = MVPP2_MAX_RXD_DFLT; @@ -7934,7 +7956,8 @@ static int mvpp2_port_probe(struct platform_device *pdev, } netdev_info(dev, "Using %s mac address %pM\n", mac_from, dev->dev_addr); - priv->port_list[index] = port; + priv->port_list[priv->port_count++] = port; + return 0; err_free_port_pcpu: @@ -8193,8 +8216,9 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv) static int mvpp2_probe(struct platform_device *pdev) { - struct device_node *dn = pdev->dev.of_node; - struct device_node *port_node; + const struct acpi_device_id *acpi_id; + struct fwnode_handle *fwnode = pdev->dev.fwnode; + struct fwnode_handle *port_fwnode; struct mvpp2 *priv; struct resource *res; void __iomem *base; @@ -8205,8 +8229,14 @@ static int mvpp2_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; - priv->hw_version = - (unsigned long)of_device_get_match_data(&pdev->dev); + if (has_acpi_companion(&pdev->dev)) { + acpi_id = acpi_match_device(pdev->dev.driver->acpi_match_table, + &pdev->dev); + priv->hw_version = (unsigned long)acpi_id->driver_data; + } else { + priv->hw_version = + (unsigned long)of_device_get_match_data(&pdev->dev); + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); base = devm_ioremap_resource(&pdev->dev, res); @@ -8220,10 +8250,23 @@ static int mvpp2_probe(struct platform_device *pdev) return PTR_ERR(priv->lms_base); } else { res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (has_acpi_companion(&pdev->dev)) { + /* In case the MDIO memory region is declared in + * the ACPI, it can already appear as 'in-use' + * in the OS. Because it is overlapped by second + * region of the network controller, make + * sure it is released, before requesting it again. + * The care is taken by mvpp2 driver to avoid + * concurrent access to this memory region. + */ + release_resource(res); + } priv->iface_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->iface_base)) return PTR_ERR(priv->iface_base); + } + if (priv->hw_version == MVPP22 && dev_of_node(&pdev->dev)) { priv->sysctrl_base = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "marvell,system-controller"); @@ -8249,32 +8292,34 @@ static int mvpp2_probe(struct platform_device *pdev) else priv->max_port_rxqs = 32; - priv->pp_clk = devm_clk_get(&pdev->dev, "pp_clk"); - if (IS_ERR(priv->pp_clk)) - return PTR_ERR(priv->pp_clk); - err = clk_prepare_enable(priv->pp_clk); - if (err < 0) - return err; - - priv->gop_clk = devm_clk_get(&pdev->dev, "gop_clk"); - if (IS_ERR(priv->gop_clk)) { - err = PTR_ERR(priv->gop_clk); - goto err_pp_clk; - } - err = clk_prepare_enable(priv->gop_clk); - if (err < 0) - goto err_pp_clk; + if (dev_of_node(&pdev->dev)) { + priv->pp_clk = devm_clk_get(&pdev->dev, "pp_clk"); + if (IS_ERR(priv->pp_clk)) + return PTR_ERR(priv->pp_clk); + err = clk_prepare_enable(priv->pp_clk); + if (err < 0) + return err; - if (priv->hw_version == MVPP22) { - priv->mg_clk = devm_clk_get(&pdev->dev, "mg_clk"); - if (IS_ERR(priv->mg_clk)) { - err = PTR_ERR(priv->mg_clk); - goto err_gop_clk; + priv->gop_clk = devm_clk_get(&pdev->dev, "gop_clk"); + if (IS_ERR(priv->gop_clk)) { + err = PTR_ERR(priv->gop_clk); + goto err_pp_clk; } - - err = clk_prepare_enable(priv->mg_clk); + err = clk_prepare_enable(priv->gop_clk); if (err < 0) - goto err_gop_clk; + goto err_pp_clk; + + if (priv->hw_version == MVPP22) { + priv->mg_clk = devm_clk_get(&pdev->dev, "mg_clk"); + if (IS_ERR(priv->mg_clk)) { + err = PTR_ERR(priv->mg_clk); + goto err_gop_clk; + } + + err = clk_prepare_enable(priv->mg_clk); + if (err < 0) + goto err_gop_clk; + } priv->axi_clk = devm_clk_get(&pdev->dev, "axi_clk"); if (IS_ERR(priv->axi_clk)) { @@ -8287,10 +8332,14 @@ static int mvpp2_probe(struct platform_device *pdev) if (err < 0) goto err_gop_clk; } - } - /* Get system's tclk rate */ - priv->tclk = clk_get_rate(priv->pp_clk); + /* Get system's tclk rate */ + priv->tclk = clk_get_rate(priv->pp_clk); + } else if (device_property_read_u32(&pdev->dev, "clock-frequency", + &priv->tclk)) { + dev_err(&pdev->dev, "missing clock-frequency value\n"); + return -EINVAL; + } if (priv->hw_version == MVPP22) { err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(40)); @@ -8313,30 +8362,19 @@ static int mvpp2_probe(struct platform_device *pdev) goto err_mg_clk; } - priv->port_count = of_get_available_child_count(dn); + /* Initialize ports */ + fwnode_for_each_available_child_node(fwnode, port_fwnode) { + err = mvpp2_port_probe(pdev, port_fwnode, priv); + if (err < 0) + goto err_port_probe; + } + if (priv->port_count == 0) { dev_err(&pdev->dev, "no ports enabled\n"); err = -ENODEV; goto err_mg_clk; } - priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count, - sizeof(*priv->port_list), - GFP_KERNEL); - if (!priv->port_list) { - err = -ENOMEM; - goto err_mg_clk; - } - - /* Initialize ports */ - i = 0; - for_each_available_child_of_node(dn, port_node) { - err = mvpp2_port_probe(pdev, port_node, priv, i); - if (err < 0) - goto err_port_probe; - i++; - } - /* Statistics must be gathered regularly because some of them (like * packets counters) are 32-bit registers and could overflow quite * quickly. For instance, a 10Gb link used at full bandwidth with the @@ -8357,7 +8395,7 @@ static int mvpp2_probe(struct platform_device *pdev) err_port_probe: i = 0; - for_each_available_child_of_node(dn, port_node) { + fwnode_for_each_available_child_node(fwnode, port_fwnode) { if (priv->port_list[i]) mvpp2_port_remove(priv->port_list[i]); i++; @@ -8376,14 +8414,14 @@ err_pp_clk: static int mvpp2_remove(struct platform_device *pdev) { struct mvpp2 *priv = platform_get_drvdata(pdev); - struct device_node *dn = pdev->dev.of_node; - struct device_node *port_node; + struct fwnode_handle *fwnode = pdev->dev.fwnode; + struct fwnode_handle *port_fwnode; int i = 0; flush_workqueue(priv->stats_queue); destroy_workqueue(priv->stats_queue); - for_each_available_child_of_node(dn, port_node) { + fwnode_for_each_available_child_node(fwnode, port_fwnode) { if (priv->port_list[i]) { mutex_destroy(&priv->port_list[i]->gather_stats_lock); mvpp2_port_remove(priv->port_list[i]); @@ -8406,6 +8444,9 @@ static int mvpp2_remove(struct platform_device *pdev) aggr_txq->descs_dma); } + if (is_acpi_node(port_fwnode)) + return 0; + clk_disable_unprepare(priv->axi_clk); clk_disable_unprepare(priv->mg_clk); clk_disable_unprepare(priv->pp_clk); @@ -8427,12 +8468,19 @@ static const struct of_device_id mvpp2_match[] = { }; MODULE_DEVICE_TABLE(of, mvpp2_match); +static const struct acpi_device_id mvpp2_acpi_match[] = { + { "MRVL0110", MVPP22 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, mvpp2_acpi_match); + static struct platform_driver mvpp2_driver = { .probe = mvpp2_probe, .remove = mvpp2_remove, .driver = { .name = MVPP2_DRIVER_NAME, .of_match_table = mvpp2_match, + .acpi_match_table = ACPI_PTR(mvpp2_acpi_match), }, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7b988595ac5f..4c9360b25532 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -44,6 +44,7 @@ #include <linux/mlx5/port.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/transobj.h> +#include <linux/mlx5/fs.h> #include <linux/rhashtable.h> #include <net/switchdev.h> #include <net/xdp.h> @@ -560,6 +561,7 @@ struct mlx5e_channel { /* data path - accessed per napi poll */ struct irq_desc *irq_desc; + struct mlx5e_ch_stats stats; /* control */ struct mlx5e_priv *priv; @@ -696,6 +698,11 @@ enum { MLX5E_ARFS_FT_LEVEL }; +enum { + MLX5E_TC_FT_LEVEL = 0, + MLX5E_TC_TTC_FT_LEVEL, +}; + struct mlx5e_ethtool_table { struct mlx5_flow_table *ft; int num_rules; @@ -834,7 +841,7 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); -void mlx5e_update_stats(struct mlx5e_priv *priv, bool full); +void mlx5e_update_stats(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); @@ -1024,11 +1031,26 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); -int mlx5e_create_ttc_table(struct mlx5e_priv *priv); -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); +struct ttc_params { + struct mlx5_flow_table_attr ft_attr; + u32 any_tt_tirn; + u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_ttc_table *inner_ttc; +}; -int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv); -void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv); +void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params); +void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params); +void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params); + +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc); +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc); + +int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc); +void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc); int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, u32 underlay_qpn, u32 *tisn); @@ -1041,6 +1063,8 @@ int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); +int mlx5e_bits_invert(unsigned long a, int size); + /* ethtool helpers */ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2d1395015ab5..cc8048f68f11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -207,7 +207,7 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, return; mutex_lock(&priv->state_lock); - mlx5e_update_stats(priv, true); + mlx5e_update_stats(priv); mutex_unlock(&priv->state_lock); for (i = 0; i < mlx5e_num_stats_grps; i++) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index def513484845..f64dda2bed31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -806,25 +806,25 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, return err ? ERR_PTR(err) : rule; } -static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) +static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv, + struct ttc_params *params, + struct mlx5e_ttc_table *ttc) { struct mlx5_flow_destination dest = {}; - struct mlx5e_ttc_table *ttc; struct mlx5_flow_handle **rules; struct mlx5_flow_table *ft; int tt; int err; - ttc = &priv->fs.ttc; ft = ttc->ft.t; rules = ttc->rules; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; for (tt = 0; tt < MLX5E_NUM_TT; tt++) { if (tt == MLX5E_TT_ANY) - dest.tir_num = priv->direct_tir[0].tirn; + dest.tir_num = params->any_tt_tirn; else - dest.tir_num = priv->indir_tir[tt].tirn; + dest.tir_num = params->indir_tirn[tt]; rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, ttc_rules[tt].etype, ttc_rules[tt].proto); @@ -832,12 +832,12 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) goto del_rules; } - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) return 0; rules = ttc->tunnel_rules; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.inner_ttc.ft.t; + dest.ft = params->inner_ttc->ft.t; for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, ttc_tunnel_rules[tt].etype, @@ -977,25 +977,25 @@ mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, return err ? ERR_PTR(err) : rule; } -static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv) +static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv, + struct ttc_params *params, + struct mlx5e_ttc_table *ttc) { struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle **rules; - struct mlx5e_ttc_table *ttc; struct mlx5_flow_table *ft; int err; int tt; - ttc = &priv->fs.inner_ttc; ft = ttc->ft.t; rules = ttc->rules; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; for (tt = 0; tt < MLX5E_NUM_TT; tt++) { if (tt == MLX5E_TT_ANY) - dest.tir_num = priv->direct_tir[0].tirn; + dest.tir_num = params->any_tt_tirn; else - dest.tir_num = priv->inner_indir_tir[tt].tirn; + dest.tir_num = params->indir_tirn[tt]; rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, ttc_rules[tt].etype, @@ -1075,21 +1075,42 @@ err: return err; } -int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) +void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params) +{ + ttc_params->any_tt_tirn = priv->direct_tir[0].tirn; + ttc_params->inner_ttc = &priv->fs.inner_ttc; +} + +void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params) +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + + ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_NIC_PRIO; +} + +void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params) + +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + + ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; + ft_attr->level = MLX5E_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_NIC_PRIO; +} + +int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc) { - struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; - struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; int err; if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) return 0; - ft_attr.max_fte = MLX5E_INNER_TTC_TABLE_SIZE; - ft_attr.level = MLX5E_INNER_TTC_FT_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; - - ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -1100,7 +1121,7 @@ int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) if (err) goto err; - err = mlx5e_generate_inner_ttc_table_rules(priv); + err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc); if (err) goto err; @@ -1111,10 +1132,9 @@ err: return err; } -void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc) { - struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) return; @@ -1122,27 +1142,21 @@ void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) mlx5e_destroy_flow_table(&ttc->ft); } -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, + struct mlx5e_ttc_table *ttc) { - struct mlx5e_ttc_table *ttc = &priv->fs.ttc; - mlx5e_cleanup_ttc_rules(ttc); mlx5e_destroy_flow_table(&ttc->ft); } -int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, + struct mlx5e_ttc_table *ttc) { bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); - struct mlx5e_ttc_table *ttc = &priv->fs.ttc; - struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; int err; - ft_attr.max_fte = MLX5E_TTC_TABLE_SIZE; - ft_attr.level = MLX5E_TTC_FT_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; - - ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -1153,7 +1167,7 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv) if (err) goto err; - err = mlx5e_generate_ttc_table_rules(priv); + err = mlx5e_generate_ttc_table_rules(priv, params, ttc); if (err) goto err; @@ -1474,7 +1488,8 @@ static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) int mlx5e_create_flow_steering(struct mlx5e_priv *priv) { - int err; + struct ttc_params ttc_params = {}; + int tt, err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); @@ -1489,14 +1504,23 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_inner_ttc_table(priv); + mlx5e_set_ttc_basic_params(priv, &ttc_params); + mlx5e_set_inner_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; + + err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); if (err) { netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", err); goto err_destroy_arfs_tables; } - err = mlx5e_create_ttc_table(priv); + mlx5e_set_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; + + err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); @@ -1524,9 +1548,9 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: - mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_inner_ttc_table: - mlx5e_destroy_inner_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1537,8 +1561,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); - mlx5e_destroy_ttc_table(priv); - mlx5e_destroy_inner_ttc_table(priv); + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 466a4e1244d7..8530c770c873 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -173,182 +173,23 @@ unlock: rtnl_unlock(); } -static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) +void mlx5e_update_stats(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats temp, *s = &temp; - struct mlx5e_rq_stats *rq_stats; - struct mlx5e_sq_stats *sq_stats; - int i, j; - - memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->channels.num; i++) { - struct mlx5e_channel *c = priv->channels.c[i]; - - rq_stats = &c->rq.stats; - - s->rx_packets += rq_stats->packets; - s->rx_bytes += rq_stats->bytes; - s->rx_lro_packets += rq_stats->lro_packets; - s->rx_lro_bytes += rq_stats->lro_bytes; - s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; - s->rx_csum_none += rq_stats->csum_none; - s->rx_csum_complete += rq_stats->csum_complete; - s->rx_csum_unnecessary += rq_stats->csum_unnecessary; - s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; - s->rx_xdp_drop += rq_stats->xdp_drop; - s->rx_xdp_tx += rq_stats->xdp_tx; - s->rx_xdp_tx_full += rq_stats->xdp_tx_full; - s->rx_wqe_err += rq_stats->wqe_err; - s->rx_mpwqe_filler += rq_stats->mpwqe_filler; - s->rx_buff_alloc_err += rq_stats->buff_alloc_err; - s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; - s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; - s->rx_page_reuse += rq_stats->page_reuse; - s->rx_cache_reuse += rq_stats->cache_reuse; - s->rx_cache_full += rq_stats->cache_full; - s->rx_cache_empty += rq_stats->cache_empty; - s->rx_cache_busy += rq_stats->cache_busy; - s->rx_cache_waive += rq_stats->cache_waive; - - for (j = 0; j < priv->channels.params.num_tc; j++) { - sq_stats = &c->sq[j].stats; - - s->tx_packets += sq_stats->packets; - s->tx_bytes += sq_stats->bytes; - s->tx_tso_packets += sq_stats->tso_packets; - s->tx_tso_bytes += sq_stats->tso_bytes; - s->tx_tso_inner_packets += sq_stats->tso_inner_packets; - s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; - s->tx_added_vlan_packets += sq_stats->added_vlan_packets; - s->tx_queue_stopped += sq_stats->stopped; - s->tx_queue_wake += sq_stats->wake; - s->tx_queue_dropped += sq_stats->dropped; - s->tx_xmit_more += sq_stats->xmit_more; - s->tx_csum_partial_inner += sq_stats->csum_partial_inner; - s->tx_csum_none += sq_stats->csum_none; - s->tx_csum_partial += sq_stats->csum_partial; - } - } - - s->link_down_events_phy = MLX5_GET(ppcnt_reg, - priv->stats.pport.phy_counters, - counter_set.phys_layer_cntrs.link_down_events); - memcpy(&priv->stats.sw, s, sizeof(*s)); -} - -static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) -{ - int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); - u32 *out = (u32 *)priv->stats.vport.query_vport_out; - u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; - struct mlx5_core_dev *mdev = priv->mdev; - - MLX5_SET(query_vport_counter_in, in, opcode, - MLX5_CMD_OP_QUERY_VPORT_COUNTER); - MLX5_SET(query_vport_counter_in, in, op_mod, 0); - MLX5_SET(query_vport_counter_in, in, other_vport, 0); - - mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); -} - -static void mlx5e_update_pport_counters(struct mlx5e_priv *priv, bool full) -{ - struct mlx5e_pport_stats *pstats = &priv->stats.pport; - struct mlx5_core_dev *mdev = priv->mdev; - u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - int prio; - void *out; - - MLX5_SET(ppcnt_reg, in, local_port, 1); - - out = pstats->IEEE_802_3_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - - if (!full) - return; - - out = pstats->RFC_2863_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - - out = pstats->RFC_2819_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - - out = pstats->phy_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - - if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) { - out = pstats->phy_statistical_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - } - - if (MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) { - out = pstats->eth_ext_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - } - - MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { - out = pstats->per_prio_counters[prio]; - MLX5_SET(ppcnt_reg, in, prio_tc, prio); - mlx5_core_access_reg(mdev, in, sz, out, sz, - MLX5_REG_PPCNT, 0, 0); - } -} - -static void mlx5e_update_q_counter(struct mlx5e_priv *priv) -{ - struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; - u32 out[MLX5_ST_SZ_DW(query_q_counter_out)]; - int err; - - if (!priv->q_counter) - return; - - err = mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, sizeof(out)); - if (err) - return; - - qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer); -} - -static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) -{ - struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; - struct mlx5_core_dev *mdev = priv->mdev; - u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0}; - int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); - void *out; - - if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) - return; - - out = pcie_stats->pcie_perf_counters; - MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); -} + int i; -void mlx5e_update_stats(struct mlx5e_priv *priv, bool full) -{ - if (full) { - mlx5e_update_pcie_counters(priv); - mlx5e_ipsec_update_stats(priv); - } - mlx5e_update_pport_counters(priv, full); - mlx5e_update_vport_counters(priv); - mlx5e_update_q_counter(priv); - mlx5e_update_sw_counters(priv); + for (i = mlx5e_num_stats_grps - 1; i >= 0; i--) + if (mlx5e_stats_grps[i].update_stats) + mlx5e_stats_grps[i].update_stats(priv); } static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) { - mlx5e_update_stats(priv, false); + int i; + + for (i = mlx5e_num_stats_grps - 1; i >= 0; i--) + if (mlx5e_stats_grps[i].update_stats_mask & + MLX5E_NDO_UPDATE_STATS) + mlx5e_stats_grps[i].update_stats(priv); } void mlx5e_update_stats_work(struct work_struct *work) @@ -2219,7 +2060,7 @@ static int mlx5e_rx_hash_fn(int hfunc) MLX5_RX_HASH_FN_INVERTED_XOR8; } -static int mlx5e_bits_invert(unsigned long a, int size) +int mlx5e_bits_invert(unsigned long a, int size) { int inv = 0; int i; @@ -3757,26 +3598,62 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } +static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, + struct mlx5e_txqsq *sq) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int irqn_not_used, eqn; + struct mlx5_eq *eq; + u32 eqe_count; + + if (mlx5_vector2eqn(mdev, sq->cq.mcq.vector, &eqn, &irqn_not_used)) + return false; + + eq = mlx5_eqn2eq(mdev, eqn); + if (IS_ERR(eq)) + return false; + + netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eqn, eq->cons_index, eq->irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + if (!eqe_count) + return false; + + netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn); + sq->channel->stats.eq_rearm++; + return true; +} + static void mlx5e_tx_timeout(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - bool sched_work = false; + bool reopen_channels = false; int i; netdev_err(dev, "TX timeout detected\n"); for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); struct mlx5e_txqsq *sq = priv->txq2sq[i]; - if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) + if (!netif_xmit_stopped(dev_queue)) continue; - sched_work = true; - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n", - i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc); + netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", + i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - dev_queue->trans_start)); + + /* If we recover a lost interrupt, most likely TX timeout will + * be resolved, skip reopening channels + */ + if (!mlx5e_tx_timeout_eq_recover(dev, sq)) { + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + reopen_channels = true; + } } - if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (reopen_channels && test_bit(MLX5E_STATE_OPENED, &priv->state)) schedule_work(&priv->tx_timeout_work); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ff234dfefc27..0d4bb0688faa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -631,7 +631,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (is_last_ethertype_ip(skb, &network_depth)) { + if (likely(is_last_ethertype_ip(skb, &network_depth))) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); if (network_depth > ETH_HLEN) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index b74ddc7984bc..5f0f3493d747 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -71,6 +71,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; @@ -99,6 +100,72 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } +static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_sw_stats temp, *s = &temp; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + struct mlx5e_ch_stats *ch_stats; + int i, j; + + memset(s, 0, sizeof(*s)); + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + rq_stats = &c->rq.stats; + ch_stats = &c->stats; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->rx_lro_packets += rq_stats->lro_packets; + s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; + s->rx_csum_none += rq_stats->csum_none; + s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_unnecessary += rq_stats->csum_unnecessary; + s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_tx += rq_stats->xdp_tx; + s->rx_xdp_tx_full += rq_stats->xdp_tx_full; + s->rx_wqe_err += rq_stats->wqe_err; + s->rx_mpwqe_filler += rq_stats->mpwqe_filler; + s->rx_buff_alloc_err += rq_stats->buff_alloc_err; + s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; + s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_page_reuse += rq_stats->page_reuse; + s->rx_cache_reuse += rq_stats->cache_reuse; + s->rx_cache_full += rq_stats->cache_full; + s->rx_cache_empty += rq_stats->cache_empty; + s->rx_cache_busy += rq_stats->cache_busy; + s->rx_cache_waive += rq_stats->cache_waive; + s->ch_eq_rearm += ch_stats->eq_rearm; + + for (j = 0; j < priv->channels.params.num_tc; j++) { + sq_stats = &c->sq[j].stats; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_tso_packets += sq_stats->tso_packets; + s->tx_tso_bytes += sq_stats->tso_bytes; + s->tx_tso_inner_packets += sq_stats->tso_inner_packets; + s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_added_vlan_packets += sq_stats->added_vlan_packets; + s->tx_queue_stopped += sq_stats->stopped; + s->tx_queue_wake += sq_stats->wake; + s->tx_queue_dropped += sq_stats->dropped; + s->tx_xmit_more += sq_stats->xmit_more; + s->tx_csum_partial_inner += sq_stats->csum_partial_inner; + s->tx_csum_none += sq_stats->csum_none; + s->tx_csum_partial += sq_stats->csum_partial; + } + } + + s->link_down_events_phy = MLX5_GET(ppcnt_reg, + priv->stats.pport.phy_counters, + counter_set.phys_layer_cntrs.link_down_events); + memcpy(&priv->stats.sw, s, sizeof(*s)); +} + static const struct counter_desc q_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, }; @@ -128,6 +195,22 @@ static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } +static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)]; + int err; + + if (!priv->q_counter) + return; + + err = mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, sizeof(out)); + if (err) + return; + + qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer); +} + #define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) static const struct counter_desc vport_stats_desc[] = { { "rx_vport_unicast_packets", @@ -200,6 +283,19 @@ static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_vport_update_stats(struct mlx5e_priv *priv) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 *out = (u32 *)priv->stats.vport.query_vport_out; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; + struct mlx5_core_dev *mdev = priv->mdev; + + MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, other_vport, 0); + mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); +} + #define PPORT_802_3_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) @@ -252,6 +348,20 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->IEEE_802_3_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + #define PPORT_2863_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_2863_cntrs_grp_data_layout.c##_high) @@ -289,6 +399,20 @@ static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_2863_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->RFC_2863_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + #define PPORT_2819_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_2819_cntrs_grp_data_layout.c##_high) @@ -336,6 +460,20 @@ static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->RFC_2819_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + #define PPORT_PHY_STATISTICAL_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.phys_layer_statistical_cntrs.c##_high) @@ -376,6 +514,27 @@ static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } +static void mlx5e_grp_phy_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->phy_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return; + + out = pstats->phy_statistical_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + #define PPORT_ETH_EXT_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_extended_cntrs_grp_data_layout.c##_high) @@ -418,6 +577,23 @@ static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_eth_ext_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + if (!MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->eth_ext_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + #define PCIE_PERF_OFF(c) \ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) static const struct counter_desc pcie_perf_stats_desc[] = { @@ -505,6 +681,22 @@ static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +static void mlx5e_grp_pcie_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); + void *out; + + if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) + return; + + out = pcie_stats->pcie_perf_counters; + MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); +} + #define PPORT_PER_PRIO_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_per_prio_grp_data_layout.c##_high) @@ -656,6 +848,47 @@ static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, return idx; } +static int mlx5e_grp_per_prio_get_num_stats(struct mlx5e_priv *priv) +{ + return mlx5e_grp_per_prio_traffic_get_num_stats(priv) + + mlx5e_grp_per_prio_pfc_get_num_stats(priv); +} + +static int mlx5e_grp_per_prio_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx); + idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx); + return idx; +} + +static int mlx5e_grp_per_prio_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx); + idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx); + return idx; +} + +static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int prio; + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, + MLX5_REG_PPCNT, 0, 0); + } +} + static const struct counter_desc mlx5e_pme_status_desc[] = { { "module_unplug", 8 }, }; @@ -723,6 +956,11 @@ static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx + mlx5e_ipsec_get_stats(priv, data + idx); } +static void mlx5e_grp_ipsec_update_stats(struct mlx5e_priv *priv) +{ + mlx5e_ipsec_update_stats(priv); +} + static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, @@ -767,12 +1005,18 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, }; +static const struct counter_desc ch_stats_desc[] = { + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, +}; + #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) +#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc) static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) { return (NUM_RQ_STATS * priv->channels.num) + + (NUM_CH_STATS * priv->channels.num) + (NUM_SQ_STATS * priv->channels.num * priv->channels.params.num_tc); } @@ -785,6 +1029,11 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, return idx; for (i = 0; i < priv->channels.num; i++) + for (j = 0; j < NUM_CH_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ch_stats_desc[j].format, i); + + for (i = 0; i < priv->channels.num; i++) for (j = 0; j < NUM_RQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); @@ -808,6 +1057,12 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; for (i = 0; i < channels->num; i++) + for (j = 0; j < NUM_CH_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&channels->c[i]->stats, + ch_stats_desc, j); + + for (i = 0; i < channels->num; i++) for (j = 0; j < NUM_RQ_STATS; j++) data[idx++] = MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats, @@ -823,61 +1078,71 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } +/* The stats groups order is opposite to the update_stats() order calls */ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { { .get_num_stats = mlx5e_grp_sw_get_num_stats, .fill_strings = mlx5e_grp_sw_fill_strings, .fill_stats = mlx5e_grp_sw_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_sw_update_stats, }, { .get_num_stats = mlx5e_grp_q_get_num_stats, .fill_strings = mlx5e_grp_q_fill_strings, .fill_stats = mlx5e_grp_q_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_q_update_stats, }, { .get_num_stats = mlx5e_grp_vport_get_num_stats, .fill_strings = mlx5e_grp_vport_fill_strings, .fill_stats = mlx5e_grp_vport_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_vport_update_stats, }, { .get_num_stats = mlx5e_grp_802_3_get_num_stats, .fill_strings = mlx5e_grp_802_3_fill_strings, .fill_stats = mlx5e_grp_802_3_fill_stats, + .update_stats_mask = MLX5E_NDO_UPDATE_STATS, + .update_stats = mlx5e_grp_802_3_update_stats, }, { .get_num_stats = mlx5e_grp_2863_get_num_stats, .fill_strings = mlx5e_grp_2863_fill_strings, .fill_stats = mlx5e_grp_2863_fill_stats, + .update_stats = mlx5e_grp_2863_update_stats, }, { .get_num_stats = mlx5e_grp_2819_get_num_stats, .fill_strings = mlx5e_grp_2819_fill_strings, .fill_stats = mlx5e_grp_2819_fill_stats, + .update_stats = mlx5e_grp_2819_update_stats, }, { .get_num_stats = mlx5e_grp_phy_get_num_stats, .fill_strings = mlx5e_grp_phy_fill_strings, .fill_stats = mlx5e_grp_phy_fill_stats, + .update_stats = mlx5e_grp_phy_update_stats, }, { .get_num_stats = mlx5e_grp_eth_ext_get_num_stats, .fill_strings = mlx5e_grp_eth_ext_fill_strings, .fill_stats = mlx5e_grp_eth_ext_fill_stats, + .update_stats = mlx5e_grp_eth_ext_update_stats, }, { .get_num_stats = mlx5e_grp_pcie_get_num_stats, .fill_strings = mlx5e_grp_pcie_fill_strings, .fill_stats = mlx5e_grp_pcie_fill_stats, + .update_stats = mlx5e_grp_pcie_update_stats, }, { - .get_num_stats = mlx5e_grp_per_prio_traffic_get_num_stats, - .fill_strings = mlx5e_grp_per_prio_traffic_fill_strings, - .fill_stats = mlx5e_grp_per_prio_traffic_fill_stats, - }, - { - .get_num_stats = mlx5e_grp_per_prio_pfc_get_num_stats, - .fill_strings = mlx5e_grp_per_prio_pfc_fill_strings, - .fill_stats = mlx5e_grp_per_prio_pfc_fill_stats, + .get_num_stats = mlx5e_grp_per_prio_get_num_stats, + .fill_strings = mlx5e_grp_per_prio_fill_strings, + .fill_stats = mlx5e_grp_per_prio_fill_stats, + .update_stats = mlx5e_grp_per_prio_update_stats, }, { .get_num_stats = mlx5e_grp_pme_get_num_stats, @@ -888,6 +1153,7 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .get_num_stats = mlx5e_grp_ipsec_get_num_stats, .fill_strings = mlx5e_grp_ipsec_fill_strings, .fill_stats = mlx5e_grp_ipsec_fill_stats, + .update_stats = mlx5e_grp_ipsec_update_stats, }, { .get_num_stats = mlx5e_grp_channels_get_num_stats, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index d679e21f686e..0b3320a2b072 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -44,6 +44,7 @@ #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld) struct counter_desc { char format[ETH_GSTRING_LEN]; @@ -88,6 +89,7 @@ struct mlx5e_sw_stats { u64 rx_cache_empty; u64 rx_cache_busy; u64 rx_cache_waive; + u64 ch_eq_rearm; /* Special handling counters */ u64 link_down_events_phy; @@ -192,6 +194,10 @@ struct mlx5e_sq_stats { u64 dropped; }; +struct mlx5e_ch_stats { + u64 eq_rearm; +}; + struct mlx5e_stats { struct mlx5e_sw_stats sw; struct mlx5e_qcounter_stats qcnt; @@ -201,11 +207,17 @@ struct mlx5e_stats { struct mlx5e_pcie_stats pcie; }; +enum { + MLX5E_NDO_UPDATE_STATS = BIT(0x1), +}; + struct mlx5e_priv; struct mlx5e_stats_grp { + u16 update_stats_mask; int (*get_num_stats)(struct mlx5e_priv *priv); int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx); int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); + void (*update_stats)(struct mlx5e_priv *priv); }; extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cf528da51243..fd98b0dc610f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -51,12 +51,14 @@ #include "en_tc.h" #include "eswitch.h" #include "vxlan.h" +#include "fs_core.h" struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; u32 mod_hdr_id; u32 hairpin_tirn; + struct mlx5_flow_table *hairpin_ft; }; enum { @@ -64,6 +66,7 @@ enum { MLX5E_TC_FLOW_NIC = BIT(1), MLX5E_TC_FLOW_OFFLOADED = BIT(2), MLX5E_TC_FLOW_HAIRPIN = BIT(3), + MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(4), }; struct mlx5e_tc_flow { @@ -100,8 +103,14 @@ struct mlx5e_hairpin { struct mlx5_hairpin *pair; struct mlx5_core_dev *func_mdev; + struct mlx5e_priv *func_priv; u32 tdn; u32 tirn; + + int num_channels; + struct mlx5e_rqt indir_rqt; + u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_ttc_table ttc; }; struct mlx5e_hairpin_entry { @@ -111,7 +120,8 @@ struct mlx5e_hairpin_entry { /* flows sharing the same hairpin */ struct list_head flows; - int peer_ifindex; + u16 peer_vhca_id; + u8 prio; struct mlx5e_hairpin *hp; }; @@ -268,7 +278,7 @@ static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); - MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn); + MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]); MLX5_SET(tirc, tirc, transport_domain, hp->tdn); err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn); @@ -289,6 +299,151 @@ static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); } +static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) +{ + u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn; + struct mlx5e_priv *priv = hp->func_priv; + int i, ix, sz = MLX5E_INDIR_RQT_SIZE; + + mlx5e_build_default_indir_rqt(indirection_rqt, sz, + hp->num_channels); + + for (i = 0; i < sz; i++) { + ix = i; + if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(i, ilog2(sz)); + ix = indirection_rqt[ix]; + rqn = hp->pair->rqn[ix]; + MLX5_SET(rqtc, rqtc, rq_num[i], rqn); + } +} + +static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) +{ + int inlen, err, sz = MLX5E_INDIR_RQT_SIZE; + struct mlx5e_priv *priv = hp->func_priv; + struct mlx5_core_dev *mdev = priv->mdev; + void *rqtc; + u32 *in; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); + + err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn); + if (!err) + hp->indir_rqt.enabled = true; + + kvfree(in); + return err; +} + +static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + u32 in[MLX5_ST_SZ_DW(create_tir_in)]; + int tt, i, err; + void *tirc; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + + MLX5_SET(tirc, tirc, transport_domain, hp->tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); + + err = mlx5_core_create_tir(hp->func_mdev, in, + MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]); + if (err) { + mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_tirs; + } + } + return 0; + +err_destroy_tirs: + for (i = 0; i < tt; i++) + mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]); + return err; +} + +static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) +{ + int tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]); +} + +static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, + struct ttc_params *ttc_params) +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; + + memset(ttc_params, 0, sizeof(*ttc_params)); + + ttc_params->any_tt_tirn = hp->tirn; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params->indir_tirn[tt] = hp->indir_tirn[tt]; + + ft_attr->max_fte = MLX5E_NUM_TT; + ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_TC_PRIO; +} + +static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + struct ttc_params ttc_params; + int err; + + err = mlx5e_hairpin_create_indirect_rqt(hp); + if (err) + return err; + + err = mlx5e_hairpin_create_indirect_tirs(hp); + if (err) + goto err_create_indirect_tirs; + + mlx5e_hairpin_set_ttc_params(hp, &ttc_params); + err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc); + if (err) + goto err_create_ttc_table; + + netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", + hp->num_channels, hp->ttc.ft.t->id); + + return 0; + +err_create_ttc_table: + mlx5e_hairpin_destroy_indirect_tirs(hp); +err_create_indirect_tirs: + mlx5e_destroy_rqt(priv, &hp->indir_rqt); + + return err; +} + +static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + + mlx5e_destroy_ttc_table(priv, &hp->ttc); + mlx5e_hairpin_destroy_indirect_tirs(hp); + mlx5e_destroy_rqt(priv, &hp->indir_rqt); +} + static struct mlx5e_hairpin * mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, int peer_ifindex) @@ -312,13 +467,23 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params } hp->pair = pair; hp->func_mdev = func_mdev; + hp->func_priv = priv; + hp->num_channels = params->num_channels; err = mlx5e_hairpin_create_transport(hp); if (err) goto create_transport_err; + if (hp->num_channels > 1) { + err = mlx5e_hairpin_rss_init(hp); + if (err) + goto rss_init_err; + } + return hp; +rss_init_err: + mlx5e_hairpin_destroy_transport(hp); create_transport_err: mlx5_core_hairpin_destroy(hp->pair); create_pair_err: @@ -328,41 +493,96 @@ create_pair_err: static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) { + if (hp->num_channels > 1) + mlx5e_hairpin_rss_cleanup(hp); mlx5e_hairpin_destroy_transport(hp); mlx5_core_hairpin_destroy(hp->pair); kvfree(hp); } +static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) +{ + return (peer_vhca_id << 16 | prio); +} + static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, - int peer_ifindex) + u16 peer_vhca_id, u8 prio) { struct mlx5e_hairpin_entry *hpe; + u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, - hairpin_hlist, peer_ifindex) { - if (hpe->peer_ifindex == peer_ifindex) + hairpin_hlist, hash_key) { + if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) return hpe; } return NULL; } +#define UNKNOWN_MATCH_PRIO 8 + +static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, u8 *match_prio) +{ + void *headers_c, *headers_v; + u8 prio_val, prio_mask = 0; + bool vlan_present; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { + netdev_warn(priv->netdev, + "only PCP trust state supported for hairpin\n"); + return -EOPNOTSUPP; + } +#endif + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + + vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); + if (vlan_present) { + prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); + prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); + } + + if (!vlan_present || !prio_mask) { + prio_val = UNKNOWN_MATCH_PRIO; + } else if (prio_mask != 0x7) { + netdev_warn(priv->netdev, + "masked priority match not supported for hairpin\n"); + return -EOPNOTSUPP; + } + + *match_prio = prio_val; + return 0; +} + static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr) { int peer_ifindex = parse_attr->mirred_ifindex; struct mlx5_hairpin_params params; + struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; struct mlx5e_hairpin *hp; + u64 link_speed64; + u32 link_speed; + u8 match_prio; + u16 peer_id; int err; - if (!MLX5_CAP_GEN(priv->mdev, hairpin)) { + peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { netdev_warn(priv->netdev, "hairpin is not supported\n"); return -EOPNOTSUPP; } - hpe = mlx5e_hairpin_get(priv, peer_ifindex); + peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); + err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio); + if (err) + return err; + hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); if (hpe) goto attach_flow; @@ -371,14 +591,27 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, return -ENOMEM; INIT_LIST_HEAD(&hpe->flows); - hpe->peer_ifindex = peer_ifindex; + hpe->peer_vhca_id = peer_id; + hpe->prio = match_prio; params.log_data_size = 15; params.log_data_size = min_t(u8, params.log_data_size, MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); params.log_data_size = max_t(u8, params.log_data_size, MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz)); + + params.log_num_packets = params.log_data_size - + MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev); + params.log_num_packets = min_t(u8, params.log_num_packets, + MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets)); + params.q_counter = priv->q_counter; + /* set hairpin pair per each 50Gbs share of the link */ + mlx5e_get_max_linkspeed(priv->mdev, &link_speed); + link_speed = max_t(u32, link_speed, 50000); + link_speed64 = link_speed; + do_div(link_speed64, 50000); + params.num_channels = link_speed64; hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); if (IS_ERR(hp)) { @@ -386,16 +619,23 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, goto create_hairpin_err; } - netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x log data size %d\n", - hp->tirn, hp->pair->rqn, hp->pair->peer_mdev->priv.name, - hp->pair->sqn, params.log_data_size); + netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", + hp->tirn, hp->pair->rqn[0], hp->pair->peer_mdev->priv.name, + hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); hpe->hp = hp; - hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, peer_ifindex); + hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, + hash_hairpin_info(peer_id, match_prio)); attach_flow: - flow->nic_attr->hairpin_tirn = hpe->hp->tirn; + if (hpe->hp->num_channels > 1) { + flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS; + flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; + } else { + flow->nic_attr->hairpin_tirn = hpe->hp->tirn; + } list_add(&flow->hairpin, &hpe->flows); + return 0; create_hairpin_err: @@ -443,20 +683,24 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, bool table_created = false; int err, dest_ix = 0; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { - err = mlx5e_hairpin_flow_add(priv, flow, parse_attr); - if (err) { - rule = ERR_PTR(err); - goto err_add_hairpin_flow; - } + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { + err = mlx5e_hairpin_flow_add(priv, flow, parse_attr); + if (err) { + rule = ERR_PTR(err); + goto err_add_hairpin_flow; + } + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = attr->hairpin_ft; + } else { dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest[dest_ix].tir_num = attr->hairpin_tirn; - } else { - dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[dest_ix].ft = priv->fs.vlan.ft.t; } dest_ix++; + } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = priv->fs.vlan.ft.t; + dest_ix++; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { @@ -497,7 +741,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, MLX5E_TC_PRIO, tc_tbl_size, MLX5E_TC_TABLE_NUM_GROUPS, - 0, 0); + MLX5E_TC_FT_LEVEL, 0); if (IS_ERR(priv->fs.tc.t)) { netdev_err(priv->netdev, "Failed to create tc offload table\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index e7e7cef2bde4..4d98ce0901af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -530,6 +530,24 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) return IRQ_HANDLED; } +/* Some architectures don't latch interrupts when they are disabled, so using + * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to + * avoid losing them. It is not recommended to use it, unless this is the last + * resort. + */ +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq) +{ + u32 count_eqe; + + disable_irq(eq->irqn); + count_eqe = eq->cons_index; + mlx5_eq_int(eq->irqn, eq); + count_eqe = eq->cons_index - count_eqe; + enable_irq(eq->irqn); + + return count_eqe; +} + static void init_eq_buf(struct mlx5_eq *eq) { struct mlx5_eqe *eqe; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index cc4f6ab9374a..c025c98700e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -89,6 +89,9 @@ /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) +#define KERNEL_NIC_TC_NUM_PRIOS 1 +#define KERNEL_NIC_TC_NUM_LEVELS 2 + #define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) @@ -134,7 +137,7 @@ static struct init_tree_node { ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, ETHTOOL_PRIO_NUM_LEVELS))), ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(1, 1), + ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index ef1e787e6140..264504a990ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -241,7 +241,8 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) { - int err; + struct ttc_params ttc_params = {}; + int tt, err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); @@ -256,14 +257,23 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_inner_ttc_table(priv); + mlx5e_set_ttc_basic_params(priv, &ttc_params); + mlx5e_set_inner_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; + + err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); if (err) { netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", err); goto err_destroy_arfs_tables; } - err = mlx5e_create_ttc_table(priv); + mlx5e_set_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn; + + err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); @@ -273,7 +283,7 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) return 0; err_destroy_inner_ttc_table: - mlx5e_destroy_inner_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -282,8 +292,8 @@ err_destroy_arfs_tables: static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { - mlx5e_destroy_ttc_table(priv); - mlx5e_destroy_inner_ttc_table(priv); + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); mlx5e_arfs_destroy_tables(priv); } @@ -485,7 +495,7 @@ static int mlx5i_close(struct net_device *netdev) mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); mlx5i_uninit_underlay_qp(epriv); mlx5e_deactivate_priv_channels(epriv); - mlx5e_close_channels(&epriv->channels);; + mlx5e_close_channels(&epriv->channels); unlock: mutex_unlock(&epriv->state_lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index ff4a0b889a6f..b5a46c128b28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -116,6 +116,7 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); void mlx5_cq_tasklet_cb(unsigned long data); int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index a09ebbaf3b68..9e38343a951f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -413,6 +413,7 @@ static int mlx5_hairpin_create_rq(struct mlx5_core_dev *mdev, MLX5_SET(rqc, rqc, counter_set_id, params->q_counter); MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); return mlx5_core_create_rq(mdev, in, MLX5_ST_SZ_BYTES(create_rq_in), rqn); } @@ -430,6 +431,7 @@ static int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev, MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); return mlx5_core_create_sq(mdev, in, MLX5_ST_SZ_BYTES(create_sq_in), sqn); } @@ -437,28 +439,40 @@ static int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev, static int mlx5_hairpin_create_queues(struct mlx5_hairpin *hp, struct mlx5_hairpin_params *params) { - int err; + int i, j, err; - err = mlx5_hairpin_create_rq(hp->func_mdev, params, &hp->rqn); - if (err) - goto out_err_rq; + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_create_rq(hp->func_mdev, params, &hp->rqn[i]); + if (err) + goto out_err_rq; + } - err = mlx5_hairpin_create_sq(hp->peer_mdev, params, &hp->sqn); - if (err) - goto out_err_sq; + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_create_sq(hp->peer_mdev, params, &hp->sqn[i]); + if (err) + goto out_err_sq; + } return 0; out_err_sq: - mlx5_core_destroy_rq(hp->func_mdev, hp->rqn); + for (j = 0; j < i; j++) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[j]); + i = hp->num_channels; out_err_rq: + for (j = 0; j < i; j++) + mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[j]); return err; } static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp) { - mlx5_core_destroy_rq(hp->func_mdev, hp->rqn); - mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn); + int i; + + for (i = 0; i < hp->num_channels; i++) { + mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]); + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); + } } static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn, @@ -505,41 +519,53 @@ static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn, static int mlx5_hairpin_pair_queues(struct mlx5_hairpin *hp) { - int err; + int i, j, err; - /* set peer SQ */ - err = mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn, - MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, - MLX5_CAP_GEN(hp->func_mdev, vhca_id), hp->rqn); - if (err) - goto err_modify_sq; - - /* set func RQ */ - err = mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn, - MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY, - MLX5_CAP_GEN(hp->peer_mdev, vhca_id), hp->sqn); + /* set peer SQs */ + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], + MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, + MLX5_CAP_GEN(hp->func_mdev, vhca_id), hp->rqn[i]); + if (err) + goto err_modify_sq; + } - if (err) - goto err_modify_rq; + /* set func RQs */ + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], + MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY, + MLX5_CAP_GEN(hp->peer_mdev, vhca_id), hp->sqn[i]); + if (err) + goto err_modify_rq; + } return 0; err_modify_rq: - mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn, MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_RST, 0, 0); + for (j = 0; j < i; j++) + mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[j], MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); + i = hp->num_channels; err_modify_sq: + for (j = 0; j < i; j++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[j], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); return err; } static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp) { - /* unset func RQ */ - mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn, MLX5_RQC_STATE_RDY, - MLX5_RQC_STATE_RST, 0, 0); + int i; + + /* unset func RQs */ + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); - /* unset peer SQ */ - mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn, MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_RST, 0, 0); + /* unset peer SQs */ + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); } struct mlx5_hairpin * @@ -550,13 +576,17 @@ mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev, struct mlx5_hairpin *hp; int size, err; - size = sizeof(*hp); + size = sizeof(*hp) + params->num_channels * 2 * sizeof(u32); hp = kzalloc(size, GFP_KERNEL); if (!hp) return ERR_PTR(-ENOMEM); hp->func_mdev = func_mdev; hp->peer_mdev = peer_mdev; + hp->num_channels = params->num_channels; + + hp->rqn = (void *)hp + sizeof(*hp); + hp->sqn = hp->rqn + params->num_channels; /* alloc and pair func --> peer hairpin */ err = mlx5_hairpin_create_queues(hp, params); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 6a979a09ab72..b698fb481b2e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -310,9 +310,33 @@ struct mlxsw_afa_block { struct mlxsw_afa_set *first_set; struct mlxsw_afa_set *cur_set; unsigned int cur_act_index; /* In current set. */ - struct list_head fwd_entry_ref_list; + struct list_head resource_list; /* List of resources held by actions + * in this block. + */ }; +struct mlxsw_afa_resource { + struct list_head list; + void (*destructor)(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource); +}; + +static void mlxsw_afa_resource_add(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + list_add(&resource->list, &block->resource_list); +} + +static void mlxsw_afa_resources_destroy(struct mlxsw_afa_block *block) +{ + struct mlxsw_afa_resource *resource, *tmp; + + list_for_each_entry_safe(resource, tmp, &block->resource_list, list) { + list_del(&resource->list); + resource->destructor(block, resource); + } +} + struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa) { struct mlxsw_afa_block *block; @@ -320,7 +344,7 @@ struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa) block = kzalloc(sizeof(*block), GFP_KERNEL); if (!block) return NULL; - INIT_LIST_HEAD(&block->fwd_entry_ref_list); + INIT_LIST_HEAD(&block->resource_list); block->afa = mlxsw_afa; /* At least one action set is always present, so just create it here */ @@ -336,8 +360,6 @@ err_first_set_create: } EXPORT_SYMBOL(mlxsw_afa_block_create); -static void mlxsw_afa_fwd_entry_refs_destroy(struct mlxsw_afa_block *block); - void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block) { struct mlxsw_afa_set *set = block->first_set; @@ -348,7 +370,7 @@ void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block) mlxsw_afa_set_put(block->afa, set); set = next_set; } while (set); - mlxsw_afa_fwd_entry_refs_destroy(block); + mlxsw_afa_resources_destroy(block); kfree(block); } EXPORT_SYMBOL(mlxsw_afa_block_destroy); @@ -489,10 +511,29 @@ static void mlxsw_afa_fwd_entry_put(struct mlxsw_afa *mlxsw_afa, } struct mlxsw_afa_fwd_entry_ref { - struct list_head list; + struct mlxsw_afa_resource resource; struct mlxsw_afa_fwd_entry *fwd_entry; }; +static void +mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref) +{ + mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry); + kfree(fwd_entry_ref); +} + +static void +mlxsw_afa_fwd_entry_ref_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref; + + fwd_entry_ref = container_of(resource, struct mlxsw_afa_fwd_entry_ref, + resource); + mlxsw_afa_fwd_entry_ref_destroy(block, fwd_entry_ref); +} + static struct mlxsw_afa_fwd_entry_ref * mlxsw_afa_fwd_entry_ref_create(struct mlxsw_afa_block *block, u8 local_port) { @@ -509,7 +550,8 @@ mlxsw_afa_fwd_entry_ref_create(struct mlxsw_afa_block *block, u8 local_port) goto err_fwd_entry_get; } fwd_entry_ref->fwd_entry = fwd_entry; - list_add(&fwd_entry_ref->list, &block->fwd_entry_ref_list); + fwd_entry_ref->resource.destructor = mlxsw_afa_fwd_entry_ref_destructor; + mlxsw_afa_resource_add(block, &fwd_entry_ref->resource); return fwd_entry_ref; err_fwd_entry_get: @@ -517,23 +559,51 @@ err_fwd_entry_get: return ERR_PTR(err); } +struct mlxsw_afa_counter { + struct mlxsw_afa_resource resource; + u32 counter_index; +}; + static void -mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block, - struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref) +mlxsw_afa_counter_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_counter *counter) { - list_del(&fwd_entry_ref->list); - mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry); - kfree(fwd_entry_ref); + block->afa->ops->counter_index_put(block->afa->ops_priv, + counter->counter_index); + kfree(counter); +} + +static void +mlxsw_afa_counter_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_counter *counter; + + counter = container_of(resource, struct mlxsw_afa_counter, resource); + mlxsw_afa_counter_destroy(block, counter); } -static void mlxsw_afa_fwd_entry_refs_destroy(struct mlxsw_afa_block *block) +static struct mlxsw_afa_counter * +mlxsw_afa_counter_create(struct mlxsw_afa_block *block) { - struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref; - struct mlxsw_afa_fwd_entry_ref *tmp; + struct mlxsw_afa_counter *counter; + int err; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + err = block->afa->ops->counter_index_get(block->afa->ops_priv, + &counter->counter_index); + if (err) + goto err_counter_index_get; + counter->resource.destructor = mlxsw_afa_counter_destructor; + mlxsw_afa_resource_add(block, &counter->resource); + return counter; - list_for_each_entry_safe(fwd_entry_ref, tmp, - &block->fwd_entry_ref_list, list) - mlxsw_afa_fwd_entry_ref_destroy(block, fwd_entry_ref); +err_counter_index_get: + kfree(counter); + return ERR_PTR(err); } #define MLXSW_AFA_ONE_ACTION_LEN 32 @@ -690,6 +760,16 @@ MLXSW_ITEM32(afa, trapdisc, forward_action, 0x00, 0, 4); */ MLXSW_ITEM32(afa, trapdisc, trap_id, 0x04, 0, 9); +/* afa_trapdisc_mirror_agent + * Mirror agent. + */ +MLXSW_ITEM32(afa, trapdisc, mirror_agent, 0x08, 29, 3); + +/* afa_trapdisc_mirror_enable + * Mirror enable. + */ +MLXSW_ITEM32(afa, trapdisc, mirror_enable, 0x08, 24, 1); + static inline void mlxsw_afa_trapdisc_pack(char *payload, enum mlxsw_afa_trapdisc_trap_action trap_action, @@ -701,6 +781,14 @@ mlxsw_afa_trapdisc_pack(char *payload, mlxsw_afa_trapdisc_trap_id_set(payload, trap_id); } +static inline void +mlxsw_afa_trapdisc_mirror_pack(char *payload, bool mirror_enable, + u8 mirror_agent) +{ + mlxsw_afa_trapdisc_mirror_enable_set(payload, mirror_enable); + mlxsw_afa_trapdisc_mirror_agent_set(payload, mirror_agent); +} + int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) { char *act = mlxsw_afa_block_append_action(block, @@ -746,6 +834,104 @@ int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, } EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward); +struct mlxsw_afa_mirror { + struct mlxsw_afa_resource resource; + int span_id; + u8 local_in_port; + u8 local_out_port; + bool ingress; +}; + +static void +mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_mirror *mirror) +{ + block->afa->ops->mirror_del(block->afa->ops_priv, + mirror->local_in_port, + mirror->local_out_port, + mirror->ingress); + kfree(mirror); +} + +static void +mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_mirror *mirror; + + mirror = container_of(resource, struct mlxsw_afa_mirror, resource); + mlxsw_afa_mirror_destroy(block, mirror); +} + +static struct mlxsw_afa_mirror * +mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, + u8 local_in_port, u8 local_out_port, + bool ingress) +{ + struct mlxsw_afa_mirror *mirror; + int err; + + mirror = kzalloc(sizeof(*mirror), GFP_KERNEL); + if (!mirror) + return ERR_PTR(-ENOMEM); + + err = block->afa->ops->mirror_add(block->afa->ops_priv, + local_in_port, local_out_port, + ingress, &mirror->span_id); + if (err) + goto err_mirror_add; + + mirror->ingress = ingress; + mirror->local_out_port = local_out_port; + mirror->local_in_port = local_in_port; + mirror->resource.destructor = mlxsw_afa_mirror_destructor; + mlxsw_afa_resource_add(block, &mirror->resource); + return mirror; + +err_mirror_add: + kfree(mirror); + return ERR_PTR(err); +} + +static int +mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block, + u8 mirror_agent) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + if (!act) + return -ENOBUFS; + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 0); + mlxsw_afa_trapdisc_mirror_pack(act, true, mirror_agent); + return 0; +} + +int +mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, + u8 local_in_port, u8 local_out_port, bool ingress) +{ + struct mlxsw_afa_mirror *mirror; + int err; + + mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port, + ingress); + if (IS_ERR(mirror)) + return PTR_ERR(mirror); + + err = mlxsw_afa_block_append_allocated_mirror(block, mirror->span_id); + if (err) + goto err_append_allocated_mirror; + + return 0; + +err_append_allocated_mirror: + mlxsw_afa_mirror_destroy(block, mirror); + return err; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_mirror); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) @@ -853,11 +1039,10 @@ mlxsw_afa_polcnt_pack(char *payload, mlxsw_afa_polcnt_counter_index_set(payload, counter_index); } -int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, - u32 counter_index) +int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block, + u32 counter_index) { - char *act = mlxsw_afa_block_append_action(block, - MLXSW_AFA_POLCNT_CODE, + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_POLCNT_CODE, MLXSW_AFA_POLCNT_SIZE); if (!act) return -ENOBUFS; @@ -865,6 +1050,32 @@ int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, counter_index); return 0; } +EXPORT_SYMBOL(mlxsw_afa_block_append_allocated_counter); + +int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, + u32 *p_counter_index) +{ + struct mlxsw_afa_counter *counter; + u32 counter_index; + int err; + + counter = mlxsw_afa_counter_create(block); + if (IS_ERR(counter)) + return PTR_ERR(counter); + counter_index = counter->counter_index; + + err = mlxsw_afa_block_append_allocated_counter(block, counter_index); + if (err) + goto err_append_allocated_counter; + + if (p_counter_index) + *p_counter_index = counter_index; + return 0; + +err_append_allocated_counter: + mlxsw_afa_counter_destroy(block, counter); + return err; +} EXPORT_SYMBOL(mlxsw_afa_block_append_counter); /* Virtual Router and Forwarding Domain Action diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index a8d3314c3a24..43132293475c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -46,6 +46,12 @@ struct mlxsw_afa_ops { void (*kvdl_set_del)(void *priv, u32 kvdl_index, bool is_first); int (*kvdl_fwd_entry_add)(void *priv, u32 *p_kvdl_index, u8 local_port); void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index); + int (*counter_index_get)(void *priv, unsigned int *p_counter_index); + void (*counter_index_put)(void *priv, unsigned int counter_index); + int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port, + bool ingress, int *p_span_id); + void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port, + bool ingress); }; struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set, @@ -63,12 +69,17 @@ int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, u16 trap_id); +int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, + u8 local_in_port, u8 local_out_port, + bool ingress); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, u16 vid, u8 pcp, u8 et); +int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block, + u32 counter_index); int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, - u32 counter_index); + u32 *p_counter_index); int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid); int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, u16 expected_irif, u16 min_mtu, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index bbe48917dcad..833cd0a96fd9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -76,12 +76,7 @@ #define MLXSW_FWREV_MAJOR 13 #define MLXSW_FWREV_MINOR 1530 #define MLXSW_FWREV_SUBMINOR 152 - -static const struct mlxsw_fw_rev mlxsw_sp_supported_fw_rev = { - .major = MLXSW_FWREV_MAJOR, - .minor = MLXSW_FWREV_MINOR, - .subminor = MLXSW_FWREV_SUBMINOR -}; +#define MLXSW_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100) #define MLXSW_SP_FW_FILENAME \ "mellanox/mlxsw_spectrum-" __stringify(MLXSW_FWREV_MAJOR) \ @@ -339,28 +334,25 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, return mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware); } -static bool mlxsw_sp_fw_rev_ge(const struct mlxsw_fw_rev *a, - const struct mlxsw_fw_rev *b) -{ - if (a->major != b->major) - return a->major > b->major; - if (a->minor != b->minor) - return a->minor > b->minor; - return a->subminor >= b->subminor; -} - static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) { const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev; const struct firmware *firmware; int err; - if (mlxsw_sp_fw_rev_ge(rev, &mlxsw_sp_supported_fw_rev)) + /* Validate driver & FW are compatible */ + if (rev->major != MLXSW_FWREV_MAJOR) { + WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n", + rev->major, MLXSW_FWREV_MAJOR); + return -EINVAL; + } + if (MLXSW_FWREV_MINOR_TO_BRANCH(rev->minor) == + MLXSW_FWREV_MINOR_TO_BRANCH(MLXSW_FWREV_MINOR)) return 0; - dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is out of date\n", + dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n", rev->major, rev->minor, rev->subminor); - dev_info(mlxsw_sp->bus_info->dev, "Upgrading firmware using file %s\n", + dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n", MLXSW_SP_FW_FILENAME); err = request_firmware_direct(&firmware, MLXSW_SP_FW_FILENAME, @@ -576,7 +568,7 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, span_entry->used = false; } -static struct mlxsw_sp_span_entry * +struct mlxsw_sp_span_entry * mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port) { int i; @@ -677,13 +669,28 @@ mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port, static int mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, struct mlxsw_sp_span_entry *span_entry, - enum mlxsw_sp_span_type type) + enum mlxsw_sp_span_type type, + bool bind) { - struct mlxsw_sp_span_inspected_port *inspected_port; struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; char mpar_pl[MLXSW_REG_MPAR_LEN]; - char sbib_pl[MLXSW_REG_SBIB_LEN]; int pa_id = span_entry->id; + + /* bind the port to the SPAN entry */ + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e) type, bind, pa_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); +} + +static int +mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type, + bool bind) +{ + struct mlxsw_sp_span_inspected_port *inspected_port; + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char sbib_pl[MLXSW_REG_SBIB_LEN]; int err; /* if it is an egress SPAN, bind a shared buffer to it */ @@ -699,12 +706,12 @@ mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, } } - /* bind the port to the SPAN entry */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, - (enum mlxsw_reg_mpar_i_e) type, true, pa_id); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); - if (err) - goto err_mpar_reg_write; + if (bind) { + err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type, + true); + if (err) + goto err_port_bind; + } inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL); if (!inspected_port) { @@ -717,8 +724,11 @@ mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, return 0; -err_mpar_reg_write: err_inspected_port_alloc: + if (bind) + mlxsw_sp_span_inspected_port_bind(port, span_entry, type, + false); +err_port_bind: if (type == MLXSW_SP_SPAN_EGRESS) { mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); @@ -727,25 +737,22 @@ err_inspected_port_alloc: } static void -mlxsw_sp_span_inspected_port_unbind(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry, - enum mlxsw_sp_span_type type) +mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type, + bool bind) { struct mlxsw_sp_span_inspected_port *inspected_port; struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - char mpar_pl[MLXSW_REG_MPAR_LEN]; char sbib_pl[MLXSW_REG_SBIB_LEN]; - int pa_id = span_entry->id; inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry); if (!inspected_port) return; - /* remove the inspected port */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, - (enum mlxsw_reg_mpar_i_e) type, false, pa_id); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); - + if (bind) + mlxsw_sp_span_inspected_port_bind(port, span_entry, type, + false); /* remove the SBIB buffer if it was egress SPAN */ if (type == MLXSW_SP_SPAN_EGRESS) { mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); @@ -758,9 +765,9 @@ mlxsw_sp_span_inspected_port_unbind(struct mlxsw_sp_port *port, kfree(inspected_port); } -static int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, - struct mlxsw_sp_port *to, - enum mlxsw_sp_span_type type) +int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, + struct mlxsw_sp_port *to, + enum mlxsw_sp_span_type type, bool bind) { struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp; struct mlxsw_sp_span_entry *span_entry; @@ -773,7 +780,7 @@ static int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n", span_entry->id); - err = mlxsw_sp_span_inspected_port_bind(from, span_entry, type); + err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind); if (err) goto err_port_bind; @@ -784,9 +791,8 @@ err_port_bind: return err; } -static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from, - u8 destination_port, - enum mlxsw_sp_span_type type) +void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port, + enum mlxsw_sp_span_type type, bool bind) { struct mlxsw_sp_span_entry *span_entry; @@ -799,7 +805,7 @@ static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from, netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n", span_entry->id); - mlxsw_sp_span_inspected_port_unbind(from, span_entry, type); + mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind); } static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -1590,7 +1596,8 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, mirror->to_local_port = to_port->local_port; mirror->ingress = ingress; span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; - return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type); + return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type, + true); } static void @@ -1601,8 +1608,8 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, span_type = mirror->ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; - mlxsw_sp_span_mirror_remove(mlxsw_sp_port, mirror->to_local_port, - span_type); + mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port, + span_type, true); } static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 58ff79211c09..bdd8f94a452c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -396,6 +396,16 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev); struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev); +int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, + struct mlxsw_sp_port *to, + enum mlxsw_sp_span_type type, + bool bind); +void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, + u8 destination_port, + enum mlxsw_sp_span_type type, + bool bind); +struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port); /* spectrum_dcb.c */ #ifdef CONFIG_MLXSW_SPECTRUM_DCB @@ -457,7 +467,6 @@ struct mlxsw_sp_acl_rule_info { struct mlxsw_afk_element_values values; struct mlxsw_afa_block *act_block; unsigned int counter_index; - bool counter_valid; }; enum mlxsw_sp_acl_profile { @@ -545,6 +554,10 @@ int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, u16 group_id); int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_acl_block *block, + struct net_device *out_dev); int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct net_device *out_dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 9439bfa4ecc2..0897a5435cc2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -462,27 +462,6 @@ u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset) return ops->ruleset_group_id(ruleset->priv); } -static int -mlxsw_sp_acl_rulei_counter_alloc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_rule_info *rulei) -{ - int err; - - err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &rulei->counter_index); - if (err) - return err; - rulei->counter_valid = true; - return 0; -} - -static void -mlxsw_sp_acl_rulei_counter_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_rule_info *rulei) -{ - rulei->counter_valid = false; - mlxsw_sp_flow_counter_free(mlxsw_sp, rulei->counter_index); -} - struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl) { @@ -587,6 +566,34 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, local_port, in_port); } +int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_acl_block *block, + struct net_device *out_dev) +{ + struct mlxsw_sp_acl_block_binding *binding; + struct mlxsw_sp_port *out_port; + struct mlxsw_sp_port *in_port; + + if (!list_is_singular(&block->binding_list)) + return -EOPNOTSUPP; + + binding = list_first_entry(&block->binding_list, + struct mlxsw_sp_acl_block_binding, list); + in_port = binding->mlxsw_sp_port; + if (!mlxsw_sp_port_dev_check(out_dev)) + return -EINVAL; + + out_port = netdev_priv(out_dev); + if (out_port->mlxsw_sp != mlxsw_sp) + return -EINVAL; + + return mlxsw_afa_block_append_mirror(rulei->act_block, + in_port->local_port, + out_port->local_port, + binding->ingress); +} + int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u32 action, u16 vid, u16 proto, u8 prio) @@ -619,7 +626,7 @@ int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei) { return mlxsw_afa_block_append_counter(rulei->act_block, - rulei->counter_index); + &rulei->counter_index); } int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, @@ -653,13 +660,8 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, goto err_rulei_create; } - err = mlxsw_sp_acl_rulei_counter_alloc(mlxsw_sp, rule->rulei); - if (err) - goto err_counter_alloc; return rule; -err_counter_alloc: - mlxsw_sp_acl_rulei_destroy(rule->rulei); err_rulei_create: kfree(rule); err_alloc: @@ -672,7 +674,6 @@ void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; - mlxsw_sp_acl_rulei_counter_free(mlxsw_sp, rule->rulei); mlxsw_sp_acl_rulei_destroy(rule->rulei); kfree(rule); mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c index 4d3340ed0291..6ca6894125f0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c @@ -108,11 +108,77 @@ static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index) mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); } +static int +mlxsw_sp_act_counter_index_get(void *priv, unsigned int *p_counter_index) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_flow_counter_alloc(mlxsw_sp, p_counter_index); +} + +static void +mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_flow_counter_free(mlxsw_sp, counter_index); +} + +static int +mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port, + bool ingress, int *p_span_id) +{ + struct mlxsw_sp_port *in_port, *out_port; + struct mlxsw_sp_span_entry *span_entry; + struct mlxsw_sp *mlxsw_sp = priv; + enum mlxsw_sp_span_type type; + int err; + + type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; + out_port = mlxsw_sp->ports[local_out_port]; + in_port = mlxsw_sp->ports[local_in_port]; + + err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false); + if (err) + return err; + + span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port); + if (!span_entry) { + err = -ENOENT; + goto err_span_entry_find; + } + + *p_span_id = span_entry->id; + return 0; + +err_span_entry_find: + mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false); + return err; +} + +static void +mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port, + bool ingress) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_port *in_port; + enum mlxsw_sp_span_type type; + + type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; + in_port = mlxsw_sp->ports[local_in_port]; + + mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false); +} + static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = { .kvdl_set_add = mlxsw_sp_act_kvdl_set_add, .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add, .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del, + .counter_index_get = mlxsw_sp_act_counter_index_get, + .counter_index_put = mlxsw_sp_act_counter_index_put, + .mirror_add = mlxsw_sp_act_mirror_add, + .mirror_del = mlxsw_sp_act_mirror_del, }; int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index cf7b97d40d78..6ce00e28d4ea 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -108,6 +108,13 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, out_dev); if (err) return err; + } else if (is_tcf_mirred_egress_mirror(a)) { + struct net_device *out_dev = tcf_mirred_dev(a); + + err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei, + block, out_dev); + if (err) + return err; } else if (is_tcf_vlan(a)) { u16 proto = be16_to_cpu(tcf_vlan_push_proto(a)); u32 action = tcf_vlan_action(a); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index cfacc176a1bd..55f9d2d70f9e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -286,7 +286,7 @@ static void mlxsw_sp_kvdl_parts_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_kvdl_part_fini(mlxsw_sp, i); } -u64 mlxsw_sp_kvdl_part_occ(struct mlxsw_sp_kvdl_part *part) +static u64 mlxsw_sp_kvdl_part_occ(struct mlxsw_sp_kvdl_part *part) { unsigned int nr_entries; int bit = -1; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index 34a0b632e5dd..4c7f32d4288d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -243,7 +243,8 @@ mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, if (!afa_block) return ERR_PTR(-ENOMEM); - err = mlxsw_afa_block_append_counter(afa_block, counter_index); + err = mlxsw_afa_block_append_allocated_counter(afa_block, + counter_index); if (err) goto err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 01ff5ba6796e..f0b25baba09a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -71,6 +71,7 @@ #include "spectrum_mr_tcam.h" #include "spectrum_router.h" +struct mlxsw_sp_fib; struct mlxsw_sp_vr; struct mlxsw_sp_lpm_tree; struct mlxsw_sp_rif_ops; @@ -84,6 +85,8 @@ struct mlxsw_sp_router { struct rhashtable nexthop_ht; struct list_head nexthop_list; struct { + /* One tree for each protocol: IPv4 and IPv6 */ + struct mlxsw_sp_lpm_tree *proto_trees[2]; struct mlxsw_sp_lpm_tree *trees; unsigned int tree_count; } lpm; @@ -162,6 +165,15 @@ struct mlxsw_sp_rif_ops { struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif); }; +static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree); +static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree); +static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fib *fib, + u8 tree_id); +static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fib *fib); + static unsigned int * mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir) @@ -349,14 +361,6 @@ mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); } -static bool -mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage) -{ - struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } }; - - return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none); -} - static void mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, struct mlxsw_sp_prefix_usage *prefix_usage2) @@ -398,7 +402,6 @@ enum mlxsw_sp_fib_entry_type { }; struct mlxsw_sp_nexthop_group; -struct mlxsw_sp_fib; struct mlxsw_sp_fib_node { struct list_head entry_list; @@ -445,6 +448,7 @@ struct mlxsw_sp_lpm_tree { u8 id; /* tree ID */ unsigned int ref_count; enum mlxsw_sp_l3proto proto; + unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; struct mlxsw_sp_prefix_usage prefix_usage; }; @@ -453,8 +457,6 @@ struct mlxsw_sp_fib { struct list_head node_list; struct mlxsw_sp_vr *vr; struct mlxsw_sp_lpm_tree *lpm_tree; - unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; - struct mlxsw_sp_prefix_usage prefix_usage; enum mlxsw_sp_l3proto proto; }; @@ -469,12 +471,15 @@ struct mlxsw_sp_vr { static const struct rhashtable_params mlxsw_sp_fib_ht_params; -static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr, +static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr, enum mlxsw_sp_l3proto proto) { + struct mlxsw_sp_lpm_tree *lpm_tree; struct mlxsw_sp_fib *fib; int err; + lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto]; fib = kzalloc(sizeof(*fib), GFP_KERNEL); if (!fib) return ERR_PTR(-ENOMEM); @@ -484,17 +489,26 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr, INIT_LIST_HEAD(&fib->node_list); fib->proto = proto; fib->vr = vr; + fib->lpm_tree = lpm_tree; + mlxsw_sp_lpm_tree_hold(lpm_tree); + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id); + if (err) + goto err_lpm_tree_bind; return fib; +err_lpm_tree_bind: + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); err_rhashtable_init: kfree(fib); return ERR_PTR(err); } -static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) +static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib) { + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); + mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree); WARN_ON(!list_empty(&fib->node_list)); - WARN_ON(fib->lpm_tree); rhashtable_destroy(&fib->ht); kfree(fib); } @@ -581,6 +595,9 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, goto err_left_struct_set; memcpy(&lpm_tree->prefix_usage, prefix_usage, sizeof(lpm_tree->prefix_usage)); + memset(&lpm_tree->prefix_ref_count, 0, + sizeof(lpm_tree->prefix_ref_count)); + lpm_tree->ref_count = 1; return lpm_tree; err_left_struct_set: @@ -607,8 +624,10 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, if (lpm_tree->ref_count != 0 && lpm_tree->proto == proto && mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, - prefix_usage)) + prefix_usage)) { + mlxsw_sp_lpm_tree_hold(lpm_tree); return lpm_tree; + } } return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto); } @@ -629,9 +648,10 @@ static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; struct mlxsw_sp_lpm_tree *lpm_tree; u64 max_trees; - int i; + int err, i; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES)) return -EIO; @@ -649,11 +669,42 @@ static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; } + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(lpm_tree)) { + err = PTR_ERR(lpm_tree); + goto err_ipv4_tree_get; + } + mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree; + + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(lpm_tree)) { + err = PTR_ERR(lpm_tree); + goto err_ipv6_tree_get; + } + mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree; + return 0; + +err_ipv6_tree_get: + lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4]; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); +err_ipv4_tree_get: + kfree(mlxsw_sp->router->lpm.trees); + return err; } static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_lpm_tree *lpm_tree; + + lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6]; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); + + lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4]; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); + kfree(mlxsw_sp->router->lpm.trees); } @@ -745,10 +796,10 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers"); return ERR_PTR(-EBUSY); } - vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4); + vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr->fib4)) return ERR_CAST(vr->fib4); - vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6); + vr->fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); if (IS_ERR(vr->fib6)) { err = PTR_ERR(vr->fib6); goto err_fib6_create; @@ -763,21 +814,22 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, return vr; err_mr_table_create: - mlxsw_sp_fib_destroy(vr->fib6); + mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6); vr->fib6 = NULL; err_fib6_create: - mlxsw_sp_fib_destroy(vr->fib4); + mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4); vr->fib4 = NULL; return ERR_PTR(err); } -static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) +static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) { mlxsw_sp_mr_table_destroy(vr->mr4_table); vr->mr4_table = NULL; - mlxsw_sp_fib_destroy(vr->fib6); + mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6); vr->fib6 = NULL; - mlxsw_sp_fib_destroy(vr->fib4); + mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4); vr->fib4 = NULL; } @@ -793,12 +845,12 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, return vr; } -static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) +static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) { if (!vr->rif_count && list_empty(&vr->fib4->node_list) && list_empty(&vr->fib6->node_list) && mlxsw_sp_mr_table_empty(vr->mr4_table)) - mlxsw_sp_vr_destroy(vr); + mlxsw_sp_vr_destroy(mlxsw_sp, vr); } static bool @@ -809,7 +861,7 @@ mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr, if (!mlxsw_sp_vr_is_used(vr)) return false; - if (fib->lpm_tree && fib->lpm_tree->id == tree_id) + if (fib->lpm_tree->id == tree_id) return true; return false; } @@ -821,27 +873,31 @@ static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree; int err; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); - if (err) - return err; fib->lpm_tree = new_tree; mlxsw_sp_lpm_tree_hold(new_tree); + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); + if (err) + goto err_tree_bind; mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree); return 0; + +err_tree_bind: + mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); + fib->lpm_tree = old_tree; + return err; } static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib, struct mlxsw_sp_lpm_tree *new_tree) { - struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree; enum mlxsw_sp_l3proto proto = fib->proto; + struct mlxsw_sp_lpm_tree *old_tree; u8 old_id, new_id = new_tree->id; struct mlxsw_sp_vr *vr; int i, err; - if (!old_tree) - goto no_replace; + old_tree = mlxsw_sp->router->lpm.proto_trees[proto]; old_id = old_tree->id; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { @@ -855,6 +911,11 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, goto err_tree_replace; } + memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count, + sizeof(new_tree->prefix_ref_count)); + mlxsw_sp->router->lpm.proto_trees[proto] = new_tree; + mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree); + return 0; err_tree_replace: @@ -866,33 +927,6 @@ err_tree_replace: old_tree); } return err; - -no_replace: - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); - if (err) - return err; - fib->lpm_tree = new_tree; - mlxsw_sp_lpm_tree_hold(new_tree); - return 0; -} - -static void -mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_l3proto proto, - struct mlxsw_sp_prefix_usage *req_prefix_usage) -{ - int i; - - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { - struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; - struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); - unsigned char prefix; - - if (!mlxsw_sp_vr_is_used(vr)) - continue; - mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage) - mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix); - } } static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) @@ -1934,11 +1968,8 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, dipn = htonl(dip); dev = mlxsw_sp->router->rifs[rif]->dev; n = neigh_lookup(&arp_tbl, &dipn, dev); - if (!n) { - netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", - &dip); + if (!n) return; - } netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); neigh_event_send(n, NULL); @@ -1965,11 +1996,8 @@ static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, dev = mlxsw_sp->router->rifs[rif]->dev; n = neigh_lookup(&nd_tbl, &dip, dev); - if (!n) { - netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n", - &dip); + if (!n) return; - } netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip); neigh_event_send(n, NULL); @@ -4193,68 +4221,66 @@ mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, } static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib *fib, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; + struct mlxsw_sp_prefix_usage req_prefix_usage; + struct mlxsw_sp_fib *fib = fib_node->fib; struct mlxsw_sp_lpm_tree *lpm_tree; int err; - /* Since the tree is shared between all virtual routers we must - * make sure it contains all the required prefix lengths. This - * can be computed by either adding the new prefix length to the - * existing prefix usage of a bound tree, or by aggregating the - * prefix lengths across all virtual routers and adding the new - * one as well. - */ - if (fib->lpm_tree) - mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, - &fib->lpm_tree->prefix_usage); - else - mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage); - mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); + lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto]; + if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0) + goto out; + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, fib->proto); if (IS_ERR(lpm_tree)) return PTR_ERR(lpm_tree); - if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id) - return 0; - err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); if (err) - return err; + goto err_lpm_tree_replace; +out: + lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++; return 0; -} -static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib *fib) -{ - if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) - return; - mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); - mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree); - fib->lpm_tree = NULL; +err_lpm_tree_replace: + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); + return err; } -static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) +static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) { - unsigned char prefix_len = fib_node->key.prefix_len; + struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree; + struct mlxsw_sp_prefix_usage req_prefix_usage; struct mlxsw_sp_fib *fib = fib_node->fib; + int err; - if (fib->prefix_ref_count[prefix_len]++ == 0) - mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); -} + if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0) + return; + /* Try to construct a new LPM tree from the current prefix usage + * minus the unused one. If we fail, continue using the old one. + */ + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage); + mlxsw_sp_prefix_usage_clear(&req_prefix_usage, + fib_node->key.prefix_len); + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + fib->proto); + if (IS_ERR(lpm_tree)) + return; -static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node) -{ - unsigned char prefix_len = fib_node->key.prefix_len; - struct mlxsw_sp_fib *fib = fib_node->fib; + err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); + if (err) + goto err_lpm_tree_replace; - if (--fib->prefix_ref_count[prefix_len] == 0) - mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); + return; + +err_lpm_tree_replace: + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); } static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, @@ -4268,12 +4294,10 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, return err; fib_node->fib = fib; - err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node); + err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node); if (err) goto err_fib_lpm_tree_link; - mlxsw_sp_fib_node_prefix_inc(fib_node); - return 0; err_fib_lpm_tree_link: @@ -4287,8 +4311,7 @@ static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_fib *fib = fib_node->fib; - mlxsw_sp_fib_node_prefix_dec(fib_node); - mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib); + mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node); fib_node->fib = NULL; mlxsw_sp_fib_node_remove(fib, fib_node); } @@ -4327,7 +4350,7 @@ mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr, err_fib_node_init: mlxsw_sp_fib_node_destroy(fib_node); err_fib_node_create: - mlxsw_sp_vr_put(vr); + mlxsw_sp_vr_put(mlxsw_sp, vr); return ERR_PTR(err); } @@ -4340,7 +4363,7 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, return; mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node); mlxsw_sp_fib_node_destroy(fib_node); - mlxsw_sp_vr_put(vr); + mlxsw_sp_vr_put(mlxsw_sp, vr); } static struct mlxsw_sp_fib4_entry * @@ -5363,7 +5386,7 @@ static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp, return; mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc); - mlxsw_sp_vr_put(vr); + mlxsw_sp_vr_put(mlxsw_sp, vr); } static int @@ -5400,7 +5423,7 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, return; mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index); - mlxsw_sp_vr_put(vr); + mlxsw_sp_vr_put(mlxsw_sp, vr); } static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) @@ -6049,7 +6072,7 @@ err_fid_get: err_rif_alloc: err_rif_index_alloc: vr->rif_count--; - mlxsw_sp_vr_put(vr); + mlxsw_sp_vr_put(mlxsw_sp, vr); return ERR_PTR(err); } @@ -6072,7 +6095,7 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_fid_put(fid); kfree(rif); vr->rif_count--; - mlxsw_sp_vr_put(vr); + mlxsw_sp_vr_put(mlxsw_sp, vr); } static void @@ -6862,7 +6885,7 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) return 0; err_loopback_op: - mlxsw_sp_vr_put(ul_vr); + mlxsw_sp_vr_put(mlxsw_sp, ul_vr); return err; } @@ -6876,7 +6899,7 @@ static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false); --ul_vr->rif_count; - mlxsw_sp_vr_put(ul_vr); + mlxsw_sp_vr_put(mlxsw_sp, ul_vr); } static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = { diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 064f00e23a19..d5866d708dfa 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -22,6 +22,7 @@ nfp-objs := \ nfp_hwmon.o \ nfp_main.o \ nfp_net_common.o \ + nfp_net_ctrl.o \ nfp_net_debugdump.o \ nfp_net_ethtool.o \ nfp_net_main.o \ diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index 71e6586acc36..80d3aa0fc9d3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -157,7 +157,14 @@ nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type, int tag) { struct sk_buff *skb; - int err; + int i, err; + + for (i = 0; i < 50; i++) { + udelay(4); + skb = nfp_bpf_reply(bpf, tag); + if (skb) + return skb; + } err = wait_event_interruptible_timeout(bpf->cmsg_wq, skb = nfp_bpf_reply(bpf, tag), diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 8823c8360047..b3206855535a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -54,7 +54,7 @@ static bool nfp_net_ebpf_capable(struct nfp_net *nn) static int nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) + struct bpf_prog *prog, struct netlink_ext_ack *extack) { bool running, xdp_running; int ret; @@ -70,10 +70,10 @@ nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, if (prog && running && !xdp_running) return -EBUSY; - ret = nfp_net_bpf_offload(nn, prog, running); + ret = nfp_net_bpf_offload(nn, prog, running, extack); /* Stop offload if replace not possible */ if (ret && prog) - nfp_bpf_xdp_offload(app, nn, NULL); + nfp_bpf_xdp_offload(app, nn, NULL, extack); nn->dp.bpf_offload_xdp = prog && !ret; return ret; @@ -125,17 +125,31 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, struct nfp_bpf_vnic *bv; int err; - if (type != TC_SETUP_CLSBPF || - !tc_can_offload(nn->dp.netdev) || - !nfp_net_ebpf_capable(nn) || - cls_bpf->common.protocol != htons(ETH_P_ALL) || - cls_bpf->common.chain_index) + if (type != TC_SETUP_CLSBPF) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "only offload of BPF classifiers supported"); + return -EOPNOTSUPP; + } + if (!tc_can_offload_extack(nn->dp.netdev, cls_bpf->common.extack)) + return -EOPNOTSUPP; + if (!nfp_net_ebpf_capable(nn)) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "NFP firmware does not support eBPF offload"); + return -EOPNOTSUPP; + } + if (cls_bpf->common.protocol != htons(ETH_P_ALL)) { + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "only ETH_P_ALL supported as filter protocol"); + return -EOPNOTSUPP; + } + if (cls_bpf->common.chain_index) return -EOPNOTSUPP; /* Only support TC direct action */ if (!cls_bpf->exts_integrated || tcf_exts_has_actions(cls_bpf->exts)) { - nn_err(nn, "only direct action with no legacy actions supported\n"); + NL_SET_ERR_MSG_MOD(cls_bpf->common.extack, + "only direct action with no legacy actions supported"); return -EOPNOTSUPP; } @@ -152,7 +166,8 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, return 0; } - err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog); + err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog, + cls_bpf->common.extack); if (err) return err; @@ -389,6 +404,8 @@ const struct nfp_app_type app_bpf = { .id = NFP_APP_BPF_NIC, .name = "ebpf", + .ctrl_cap_mask = 0, + .init = nfp_bpf_init, .clean = nfp_bpf_clean, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index c476bca15ba4..424fe8338105 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -335,7 +335,7 @@ struct nfp_net; int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf); int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, - bool old_prog); + bool old_prog, struct netlink_ext_ack *extack); struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index e2859b2e9c6a..0a7732385469 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -127,6 +127,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; unsigned int stack_size; unsigned int max_instr; + int err; stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; if (prog->aux->stack_depth > stack_size) { @@ -143,7 +144,14 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) if (!nfp_prog->prog) return -ENOMEM; - return nfp_bpf_jit(nfp_prog); + err = nfp_bpf_jit(nfp_prog); + if (err) + return err; + + prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); + prog->aux->offload->jited_image = nfp_prog->prog; + + return 0; } static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) @@ -168,6 +176,8 @@ nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap, static int nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) { + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) + return -EINVAL; return nfp_bpf_ctrl_del_entry(offmap, key); } @@ -271,7 +281,9 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) } } -static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) +static int +nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, + struct netlink_ext_ack *extack) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; unsigned int max_mtu; @@ -281,7 +293,7 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; if (max_mtu < nn->dp.netdev->mtu) { - nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); + NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with MTU larger than HW packet split boundary"); return -EOPNOTSUPP; } @@ -303,7 +315,8 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) /* Load up the JITed code */ err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); if (err) - nn_err(nn, "FW command error while loading BPF: %d\n", err); + NL_SET_ERR_MSG_MOD(extack, + "FW command error while loading BPF"); dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64), DMA_TO_DEVICE); @@ -312,7 +325,8 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) return err; } -static void nfp_net_bpf_start(struct nfp_net *nn) +static void +nfp_net_bpf_start(struct nfp_net *nn, struct netlink_ext_ack *extack) { int err; @@ -321,7 +335,8 @@ static void nfp_net_bpf_start(struct nfp_net *nn) nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); if (err) - nn_err(nn, "FW command error while enabling BPF: %d\n", err); + NL_SET_ERR_MSG_MOD(extack, + "FW command error while enabling BPF"); } static int nfp_net_bpf_stop(struct nfp_net *nn) @@ -336,7 +351,7 @@ static int nfp_net_bpf_stop(struct nfp_net *nn) } int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, - bool old_prog) + bool old_prog, struct netlink_ext_ack *extack) { int err; @@ -354,7 +369,8 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, cap = nn_readb(nn, NFP_NET_CFG_BPF_CAP); if (!(cap & NFP_NET_BPF_CAP_RELO)) { - nn_err(nn, "FW does not support live reload\n"); + NL_SET_ERR_MSG_MOD(extack, + "FW does not support live reload"); return -EBUSY; } } @@ -366,12 +382,12 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, if (old_prog && !prog) return nfp_net_bpf_stop(nn); - err = nfp_net_bpf_load(nn, prog); + err = nfp_net_bpf_load(nn, prog, extack); if (err) return err; if (!old_prog) - nfp_net_bpf_start(nn); + nfp_net_bpf_start(nn, extack); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 615314d9e7c6..baaea6f1a9d8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -211,12 +211,6 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) cmsg_hdr = nfp_flower_cmsg_get_hdr(skb); - if (unlikely(cmsg_hdr->version != NFP_FLOWER_CMSG_VER1)) { - nfp_flower_cmsg_warn(app, "Cannot handle repr control version %u\n", - cmsg_hdr->version); - goto out; - } - type = cmsg_hdr->type; switch (type) { case NFP_FLOWER_CMSG_TYPE_PORT_REIFY: @@ -225,9 +219,6 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) case NFP_FLOWER_CMSG_TYPE_PORT_MOD: nfp_flower_cmsg_portmod_rx(app, skb); break; - case NFP_FLOWER_CMSG_TYPE_FLOW_STATS: - nfp_flower_rx_flow_stats(app, skb); - break; case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: nfp_tunnel_request_route(app, skb); break; @@ -263,7 +254,23 @@ void nfp_flower_cmsg_process_rx(struct work_struct *work) void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) { struct nfp_flower_priv *priv = app->priv; + struct nfp_flower_cmsg_hdr *cmsg_hdr; + + cmsg_hdr = nfp_flower_cmsg_get_hdr(skb); + + if (unlikely(cmsg_hdr->version != NFP_FLOWER_CMSG_VER1)) { + nfp_flower_cmsg_warn(app, "Cannot handle repr control version %u\n", + cmsg_hdr->version); + dev_kfree_skb_any(skb); + return; + } - skb_queue_tail(&priv->cmsg_skbs, skb); - schedule_work(&priv->cmsg_work); + if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_FLOW_STATS) { + /* We need to deal with stats updates from HW asap */ + nfp_flower_rx_flow_stats(app, skb); + dev_consume_skb_any(skb); + } else { + skb_queue_tail(&priv->cmsg_skbs, skb); + schedule_work(&priv->cmsg_work); + } } diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index 67c406815365..742d6f1575b5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -99,7 +99,7 @@ nfp_flower_repr_get(struct nfp_app *app, u32 port_id) if (port >= reprs->num_reprs) return NULL; - return reprs->reprs[port]; + return rcu_dereference(reprs->reprs[port]); } static int @@ -114,15 +114,19 @@ nfp_flower_reprs_reify(struct nfp_app *app, enum nfp_repr_type type, if (!reprs) return 0; - for (i = 0; i < reprs->num_reprs; i++) - if (reprs->reprs[i]) { - struct nfp_repr *repr = netdev_priv(reprs->reprs[i]); + for (i = 0; i < reprs->num_reprs; i++) { + struct net_device *netdev; + + netdev = nfp_repr_get_locked(app, reprs, i); + if (netdev) { + struct nfp_repr *repr = netdev_priv(netdev); err = nfp_flower_cmsg_portreify(repr, exists); if (err) return err; count++; } + } return count; } @@ -234,19 +238,21 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, return -ENOMEM; for (i = 0; i < cnt; i++) { + struct net_device *repr; struct nfp_port *port; u32 port_id; - reprs->reprs[i] = nfp_repr_alloc(app); - if (!reprs->reprs[i]) { + repr = nfp_repr_alloc(app); + if (!repr) { err = -ENOMEM; goto err_reprs_clean; } + RCU_INIT_POINTER(reprs->reprs[i], repr); /* For now we only support 1 PF */ WARN_ON(repr_type == NFP_REPR_TYPE_PF && i); - port = nfp_port_alloc(app, port_type, reprs->reprs[i]); + port = nfp_port_alloc(app, port_type, repr); if (repr_type == NFP_REPR_TYPE_PF) { port->pf_id = i; port->vnic = priv->nn->dp.ctrl_bar; @@ -257,11 +263,11 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, app->pf->vf_cfg_mem + i * NFP_NET_CFG_BAR_SZ; } - eth_hw_addr_random(reprs->reprs[i]); + eth_hw_addr_random(repr); port_id = nfp_flower_cmsg_pcie_port(nfp_pcie, vnic_type, i, queue); - err = nfp_repr_init(app, reprs->reprs[i], + err = nfp_repr_init(app, repr, port_id, port, priv->nn->dp.netdev); if (err) { nfp_port_free(port); @@ -270,7 +276,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, nfp_info(app->cpp, "%s%d Representor(%s) created\n", repr_type == NFP_REPR_TYPE_PF ? "PF" : "VF", i, - reprs->reprs[i]->name); + repr->name); } nfp_app_reprs_set(app, repr_type, reprs); @@ -291,7 +297,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, err_reprs_remove: reprs = nfp_app_reprs_set(app, repr_type, NULL); err_reprs_clean: - nfp_reprs_clean_and_free(reprs); + nfp_reprs_clean_and_free(app, reprs); return err; } @@ -329,17 +335,18 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) for (i = 0; i < eth_tbl->count; i++) { unsigned int phys_port = eth_tbl->ports[i].index; + struct net_device *repr; struct nfp_port *port; u32 cmsg_port_id; - reprs->reprs[phys_port] = nfp_repr_alloc(app); - if (!reprs->reprs[phys_port]) { + repr = nfp_repr_alloc(app); + if (!repr) { err = -ENOMEM; goto err_reprs_clean; } + RCU_INIT_POINTER(reprs->reprs[phys_port], repr); - port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, - reprs->reprs[phys_port]); + port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr); if (IS_ERR(port)) { err = PTR_ERR(port); goto err_reprs_clean; @@ -350,11 +357,11 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) goto err_reprs_clean; } - SET_NETDEV_DEV(reprs->reprs[phys_port], &priv->nn->pdev->dev); + SET_NETDEV_DEV(repr, &priv->nn->pdev->dev); nfp_net_get_mac_addr(app->pf, port); cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port); - err = nfp_repr_init(app, reprs->reprs[phys_port], + err = nfp_repr_init(app, repr, cmsg_port_id, port, priv->nn->dp.netdev); if (err) { nfp_port_free(port); @@ -367,7 +374,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) phys_port); nfp_info(app->cpp, "Phys Port %d Representor(%s) created\n", - phys_port, reprs->reprs[phys_port]->name); + phys_port, repr->name); } nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); @@ -397,7 +404,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) err_reprs_remove: reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, NULL); err_reprs_clean: - nfp_reprs_clean_and_free(reprs); + nfp_reprs_clean_and_free(app, reprs); err_free_ctrl_skb: kfree_skb(ctrl_skb); return err; @@ -558,6 +565,8 @@ static void nfp_flower_stop(struct nfp_app *app) const struct nfp_app_type app_flower = { .id = NFP_APP_FLOWER_NIC, .name = "flower", + + .ctrl_cap_mask = ~0U, .ctrl_has_meta = true, .extra_cap = nfp_flower_extra_cap, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 955a9f44d244..6aedef0ad433 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -32,6 +32,8 @@ */ #include <linux/bug.h> +#include <linux/lockdep.h> +#include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> @@ -99,13 +101,19 @@ nfp_app_ctrl_msg_alloc(struct nfp_app *app, unsigned int size, gfp_t priority) } struct nfp_reprs * +nfp_reprs_get_locked(struct nfp_app *app, enum nfp_repr_type type) +{ + return rcu_dereference_protected(app->reprs[type], + lockdep_is_held(&app->pf->lock)); +} + +struct nfp_reprs * nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type, struct nfp_reprs *reprs) { struct nfp_reprs *old; - old = rcu_dereference_protected(app->reprs[type], - lockdep_is_held(&app->pf->lock)); + old = nfp_reprs_get_locked(app, type); rcu_assign_pointer(app->reprs[type], reprs); return old; @@ -116,7 +124,7 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id) struct nfp_app *app; if (id >= ARRAY_SIZE(apps) || !apps[id]) { - nfp_err(pf->cpp, "failed to find app with ID 0x%02hhx\n", id); + nfp_err(pf->cpp, "unknown FW app ID 0x%02hhx, driver too old or support for FW not built in\n", id); return ERR_PTR(-EINVAL); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 6a6eb02b516e..437964afa8ee 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -43,6 +43,7 @@ struct bpf_prog; struct net_device; struct netdev_bpf; +struct netlink_ext_ack; struct pci_dev; struct sk_buff; struct sk_buff; @@ -66,6 +67,9 @@ extern const struct nfp_app_type app_flower; * struct nfp_app_type - application definition * @id: application ID * @name: application name + * @ctrl_cap_mask: ctrl vNIC capability mask, allows disabling features like + * IRQMOD which are on by default but counter-productive for + * control messages which are often latency-sensitive * @ctrl_has_meta: control messages have prepend of type:5/port:CTRL * * Callbacks @@ -100,6 +104,7 @@ struct nfp_app_type { enum nfp_app_id id; const char *name; + u32 ctrl_cap_mask; bool ctrl_has_meta; int (*init)(struct nfp_app *app); @@ -134,7 +139,8 @@ struct nfp_app_type { int (*bpf)(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *xdp); int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); + struct bpf_prog *prog, + struct netlink_ext_ack *extack); int (*sriov_enable)(struct nfp_app *app, int num_vfs); void (*sriov_disable)(struct nfp_app *app); @@ -320,11 +326,12 @@ static inline int nfp_app_bpf(struct nfp_app *app, struct nfp_net *nn, } static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) + struct bpf_prog *prog, + struct netlink_ext_ack *extack) { if (!app || !app->type->xdp_offload) return -EOPNOTSUPP; - return app->type->xdp_offload(app, nn, prog); + return app->type->xdp_offload(app, nn, prog, extack); } static inline bool __nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) @@ -385,6 +392,8 @@ static inline struct net_device *nfp_app_repr_get(struct nfp_app *app, u32 id) struct nfp_app *nfp_app_from_netdev(struct net_device *netdev); struct nfp_reprs * +nfp_reprs_get_locked(struct nfp_app *app, enum nfp_repr_type type); +struct nfp_reprs * nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type, struct nfp_reprs *reprs); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 6c9f29c2e975..eb0fc614673d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -152,18 +152,8 @@ out: static int nfp_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct nfp_pf *pf = devlink_priv(devlink); - int ret; - - mutex_lock(&pf->lock); - if (!pf->app) { - ret = -EBUSY; - goto out; - } - ret = nfp_app_eswitch_mode_get(pf->app, mode); -out: - mutex_unlock(&pf->lock); - return ret; + return nfp_app_eswitch_mode_get(pf->app, mode); } const struct devlink_ops nfp_devlink_ops = { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 0953fa8f3109..cc570bb6563c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -499,13 +499,9 @@ static int nfp_pci_probe(struct pci_dev *pdev, if (err) goto err_hwinfo_free; - err = devlink_register(devlink, &pdev->dev); - if (err) - goto err_hwinfo_free; - err = nfp_nsp_init(pdev, pf); if (err) - goto err_devlink_unreg; + goto err_hwinfo_free; pf->mip = nfp_mip_open(pf->cpp); pf->rtbl = __nfp_rtsym_table_read(pf->cpp, pf->mip); @@ -522,6 +518,7 @@ static int nfp_pci_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "Error: %d VFs already enabled, but loaded FW can only support %d\n", pf->num_vfs, pf->limit_vfs); + err = -EINVAL; goto err_fw_unload; } @@ -549,8 +546,6 @@ err_fw_unload: kfree(pf->eth_tbl); kfree(pf->nspi); vfree(pf->dumpspec); -err_devlink_unreg: - devlink_unregister(devlink); err_hwinfo_free: kfree(pf->hwinfo); nfp_cpp_free(pf->cpp); @@ -571,18 +566,13 @@ err_pci_disable: static void nfp_pci_remove(struct pci_dev *pdev) { struct nfp_pf *pf = pci_get_drvdata(pdev); - struct devlink *devlink; nfp_hwmon_unregister(pf); - devlink = priv_to_devlink(pf); - - nfp_net_pci_remove(pf); - nfp_pcie_sriov_disable(pdev); pci_sriov_set_totalvfs(pf->pdev, 0); - devlink_unregister(devlink); + nfp_net_pci_remove(pf); vfree(pf->dumpspec); kfree(pf->rtbl); @@ -598,7 +588,7 @@ static void nfp_pci_remove(struct pci_dev *pdev) kfree(pf->eth_tbl); kfree(pf->nspi); mutex_destroy(&pf->lock); - devlink_free(devlink); + devlink_free(priv_to_devlink(pf)); pci_release_regions(pdev); pci_disable_device(pdev); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 6f6e3d6fd935..d88eda9707e6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -578,6 +578,7 @@ struct nfp_net_dp { * @qcp_cfg: Pointer to QCP queue used for configuration notification * @tx_bar: Pointer to mapped TX queues * @rx_bar: Pointer to mapped FL/RX queues + * @tlv_caps: Parsed TLV capabilities * @debugfs_dir: Device directory in debugfs * @vnic_list: Entry on device vNIC list * @pdev: Backpointer to PCI device @@ -644,6 +645,8 @@ struct nfp_net { u8 __iomem *tx_bar; u8 __iomem *rx_bar; + struct nfp_net_tlv_caps tlv_caps; + struct dentry *debugfs_dir; struct list_head vnic_list; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 2b5cad3069a7..c0fd351c86b1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -293,9 +293,15 @@ int nfp_net_reconfig(struct nfp_net *nn, u32 update) */ static int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd) { + u32 mbox = nn->tlv_caps.mbox_off; int ret; - nn_writeq(nn, NFP_NET_CFG_MBOX_CMD, mbox_cmd); + if (!nfp_net_has_mbox(&nn->tlv_caps)) { + nn_err(nn, "no mailbox present, command: %u\n", mbox_cmd); + return -EIO; + } + + nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd); ret = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX); if (ret) { @@ -303,7 +309,7 @@ static int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd) return ret; } - return -nn_readl(nn, NFP_NET_CFG_MBOX_RET); + return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET); } /* Interrupt configuration and handling @@ -2458,7 +2464,7 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn) * ME timestamp ticks. There are 16 ME clock cycles for each timestamp * count. */ - factor = nn->me_freq_mhz / 16; + factor = nn->tlv_caps.me_freq_mhz / 16; /* copy RX interrupt coalesce parameters */ value = (nn->rx_coalesce_max_frames << 16) | @@ -3084,8 +3090,9 @@ nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) if (!vid) return 0; - nn_writew(nn, NFP_NET_CFG_VLAN_FILTER_VID, vid); - nn_writew(nn, NFP_NET_CFG_VLAN_FILTER_PROTO, ETH_P_8021Q); + nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid); + nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO, + ETH_P_8021Q); return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD); } @@ -3101,8 +3108,9 @@ nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) if (!vid) return 0; - nn_writew(nn, NFP_NET_CFG_VLAN_FILTER_VID, vid); - nn_writew(nn, NFP_NET_CFG_VLAN_FILTER_PROTO, ETH_P_8021Q); + nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid); + nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO, + ETH_P_8021Q); return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL); } @@ -3395,7 +3403,7 @@ nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags, if (err) return err; - err = nfp_app_xdp_offload(nn->app, nn, offload_prog); + err = nfp_app_xdp_offload(nn->app, nn, offload_prog, extack); if (err && flags & XDP_FLAGS_HW_MODE) return err; @@ -3748,18 +3756,8 @@ static void nfp_net_netdev_init(struct nfp_net *nn) nfp_net_set_ethtool_ops(netdev); } -/** - * nfp_net_init() - Initialise/finalise the nfp_net structure - * @nn: NFP Net device structure - * - * Return: 0 on success or negative errno on error. - */ -int nfp_net_init(struct nfp_net *nn) +static int nfp_net_read_caps(struct nfp_net *nn) { - int err; - - nn->dp.rx_dma_dir = DMA_FROM_DEVICE; - /* Get some of the read-only fields from the BAR */ nn->cap = nn_readl(nn, NFP_NET_CFG_CAP); nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU); @@ -3792,6 +3790,29 @@ int nfp_net_init(struct nfp_net *nn) nn->dp.rx_offset = NFP_NET_RX_OFFSET; } + /* For control vNICs mask out the capabilities app doesn't want. */ + if (!nn->dp.netdev) + nn->cap &= nn->app->type->ctrl_cap_mask; + + return 0; +} + +/** + * nfp_net_init() - Initialise/finalise the nfp_net structure + * @nn: NFP Net device structure + * + * Return: 0 on success or negative errno on error. + */ +int nfp_net_init(struct nfp_net *nn) +{ + int err; + + nn->dp.rx_dma_dir = DMA_FROM_DEVICE; + + err = nfp_net_read_caps(nn); + if (err) + return err; + /* Set default MTU and Freelist buffer size */ if (nn->max_mtu < NFP_NET_DEFAULT_MTU) nn->dp.mtu = nn->max_mtu; @@ -3815,6 +3836,11 @@ int nfp_net_init(struct nfp_net *nn) nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD; } + err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar, + &nn->tlv_caps); + if (err) + return err; + if (nn->dp.netdev) nfp_net_netdev_init(nn); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c new file mode 100644 index 000000000000..ffb402746ad4 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/bitfield.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +static void nfp_net_tlv_caps_reset(struct nfp_net_tlv_caps *caps) +{ + memset(caps, 0, sizeof(*caps)); + caps->me_freq_mhz = 1200; + caps->mbox_off = NFP_NET_CFG_MBOX_BASE; + caps->mbox_len = NFP_NET_CFG_MBOX_VAL_MAX_SZ; +} + +int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, + struct nfp_net_tlv_caps *caps) +{ + u8 __iomem *data = ctrl_mem + NFP_NET_CFG_TLV_BASE; + u8 __iomem *end = ctrl_mem + NFP_NET_CFG_BAR_SZ; + u32 hdr; + + nfp_net_tlv_caps_reset(caps); + + hdr = readl(data); + if (!hdr) + return 0; + + while (true) { + unsigned int length, offset; + u32 hdr = readl(data); + + length = FIELD_GET(NFP_NET_CFG_TLV_HEADER_LENGTH, hdr); + offset = data - ctrl_mem + NFP_NET_CFG_TLV_BASE; + + /* Advance past the header */ + data += 4; + + if (length % NFP_NET_CFG_TLV_LENGTH_INC) { + dev_err(dev, "TLV size not multiple of %u len:%u\n", + NFP_NET_CFG_TLV_LENGTH_INC, length); + return -EINVAL; + } + if (data + length > end) { + dev_err(dev, "oversized TLV offset:%u len:%u\n", + offset, length); + return -EINVAL; + } + + switch (FIELD_GET(NFP_NET_CFG_TLV_HEADER_TYPE, hdr)) { + case NFP_NET_CFG_TLV_TYPE_UNKNOWN: + dev_err(dev, "NULL TLV at offset:%u\n", offset); + return -EINVAL; + case NFP_NET_CFG_TLV_TYPE_RESERVED: + break; + case NFP_NET_CFG_TLV_TYPE_END: + if (!length) + return 0; + + dev_err(dev, "END TLV should be empty, has len:%d\n", + length); + return -EINVAL; + case NFP_NET_CFG_TLV_TYPE_ME_FREQ: + if (length != 4) { + dev_err(dev, + "ME FREQ TLV should be 4B, is %dB\n", + length); + return -EINVAL; + } + + caps->me_freq_mhz = readl(data); + break; + case NFP_NET_CFG_TLV_TYPE_MBOX: + if (!length) { + caps->mbox_off = 0; + caps->mbox_len = 0; + } else { + caps->mbox_off = data - ctrl_mem; + caps->mbox_len = length; + } + break; + default: + if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr)) + break; + + dev_err(dev, "unknown TLV type:%u offset:%u len:%u\n", + FIELD_GET(NFP_NET_CFG_TLV_HEADER_TYPE, hdr), + offset, length); + return -EINVAL; + } + + data += length; + if (data + 4 > end) { + dev_err(dev, "reached end of BAR without END TLV\n"); + return -EINVAL; + } + } + + /* Not reached */ + return -EINVAL; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 25c36001bffa..eeecef2caac6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -43,9 +43,7 @@ #ifndef _NFP_NET_CTRL_H_ #define _NFP_NET_CTRL_H_ -/* IMPORTANT: This header file is shared with the FW, - * no OS specific constructs, please! - */ +#include <linux/types.h> /** * Configuration BAR size. @@ -236,6 +234,12 @@ #define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000 /** + * TLV area start + * %NFP_NET_CFG_TLV_BASE: start anchor of the TLV area + */ +#define NFP_NET_CFG_TLV_BASE 0x0058 + +/** * VXLAN/UDP encap configuration * %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports * %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes @@ -409,11 +413,14 @@ * 4B used for update command and 4B return code * followed by a max of 504B of variable length value */ -#define NFP_NET_CFG_MBOX_CMD 0x1800 -#define NFP_NET_CFG_MBOX_RET 0x1804 -#define NFP_NET_CFG_MBOX_VAL 0x1808 +#define NFP_NET_CFG_MBOX_BASE 0x1800 #define NFP_NET_CFG_MBOX_VAL_MAX_SZ 0x1F8 +#define NFP_NET_CFG_MBOX_SIMPLE_CMD 0x0 +#define NFP_NET_CFG_MBOX_SIMPLE_RET 0x4 +#define NFP_NET_CFG_MBOX_SIMPLE_VAL 0x8 +#define NFP_NET_CFG_MBOX_SIMPLE_LEN 0x12 + #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD 1 #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2 @@ -424,9 +431,87 @@ * %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter * %NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes */ -#define NFP_NET_CFG_VLAN_FILTER NFP_NET_CFG_MBOX_VAL +#define NFP_NET_CFG_VLAN_FILTER NFP_NET_CFG_MBOX_SIMPLE_VAL #define NFP_NET_CFG_VLAN_FILTER_VID NFP_NET_CFG_VLAN_FILTER #define NFP_NET_CFG_VLAN_FILTER_PROTO (NFP_NET_CFG_VLAN_FILTER + 2) #define NFP_NET_CFG_VLAN_FILTER_SZ 0x0004 +/** + * TLV capabilities + * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV + * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV + * %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV + * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments + * %NFP_NET_CFG_TLV_VALUE: Offset of value with the TLV + * + * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE. + * Last structure must be of type %NFP_NET_CFG_TLV_TYPE_END. Presence of TLVs + * is indicated by %NFP_NET_CFG_TLV_BASE being non-zero. TLV structures may + * fill the entire remainder of the BAR or be shorter. FW must make sure TLVs + * don't conflict with other features which allocate space beyond + * %NFP_NET_CFG_TLV_BASE. %NFP_NET_CFG_TLV_TYPE_RESERVED should be used to wrap + * space used by such features. + * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH. + */ +#define NFP_NET_CFG_TLV_TYPE 0x00 +#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000 +#define NFP_NET_CFG_TLV_LENGTH 0x02 +#define NFP_NET_CFG_TLV_LENGTH_INC 4 +#define NFP_NET_CFG_TLV_VALUE 0x04 + +#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000 +#define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000 +#define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff + +/** + * Capability TLV types + * + * %NFP_NET_CFG_TLV_TYPE_UNKNOWN: + * Special TLV type to catch bugs, should never be encountered. Drivers should + * treat encountering this type as error and refuse to probe. + * + * %NFP_NET_CFG_TLV_TYPE_RESERVED: + * Reserved space, may contain legacy fixed-offset fields, or be used for + * padding. The use of this type should be otherwise avoided. + * + * %NFP_NET_CFG_TLV_TYPE_END: + * Empty, end of TLV list. Must be the last TLV. Drivers will stop processing + * further TLVs when encountered. + * + * %NFP_NET_CFG_TLV_TYPE_ME_FREQ: + * Single word, ME frequency in MHz as used in calculation for + * %NFP_NET_CFG_RXR_IRQ_MOD and %NFP_NET_CFG_TXR_IRQ_MOD. + * + * %NFP_NET_CFG_TLV_TYPE_MBOX: + * Variable, mailbox area. Overwrites the default location which is + * %NFP_NET_CFG_MBOX_BASE and length %NFP_NET_CFG_MBOX_VAL_MAX_SZ. + */ +#define NFP_NET_CFG_TLV_TYPE_UNKNOWN 0 +#define NFP_NET_CFG_TLV_TYPE_RESERVED 1 +#define NFP_NET_CFG_TLV_TYPE_END 2 +#define NFP_NET_CFG_TLV_TYPE_ME_FREQ 3 +#define NFP_NET_CFG_TLV_TYPE_MBOX 4 + +struct device; + +/** + * struct nfp_net_tlv_caps - parsed control BAR TLV capabilities + * @me_freq_mhz: ME clock_freq (MHz) + * @mbox_off: vNIC mailbox area offset + * @mbox_len: vNIC mailbox area length + */ +struct nfp_net_tlv_caps { + u32 me_freq_mhz; + unsigned int mbox_off; + unsigned int mbox_len; +}; + +int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, + struct nfp_net_tlv_caps *caps); + +static inline bool nfp_net_has_mbox(struct nfp_net_tlv_caps *caps) +{ + return caps->mbox_len >= NFP_NET_CFG_MBOX_SIMPLE_LEN; +} + #endif /* _NFP_NET_CTRL_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c index 173646e17e94..bb8ed460086e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c @@ -518,16 +518,15 @@ nfp_dump_csr_range(struct nfp_pf *pf, struct nfp_dumpspec_csr *spec_csr, max_rd_addr = cpp_rd_addr + be32_to_cpu(spec_csr->cpp.dump_length); while (cpp_rd_addr < max_rd_addr) { - if (is_xpb_read(&spec_csr->cpp.cpp_id)) - bytes_read = nfp_xpb_readl(pf->cpp, cpp_rd_addr, - (u32 *)dest); - else + if (is_xpb_read(&spec_csr->cpp.cpp_id)) { + err = nfp_xpb_readl(pf->cpp, cpp_rd_addr, (u32 *)dest); + } else { bytes_read = nfp_cpp_read(pf->cpp, cpp_id, cpp_rd_addr, dest, reg_sz); - if (bytes_read != reg_sz) { - if (bytes_read >= 0) - bytes_read = -EIO; - dump_header->error = cpu_to_be32(bytes_read); + err = bytes_read == reg_sz ? 0 : -EIO; + } + if (err) { + dump_header->error = cpu_to_be32(err); dump_header->error_offset = cpu_to_be32(cpp_rd_addr); break; } @@ -555,8 +554,8 @@ nfp_read_indirect_csr(struct nfp_cpp *cpp, NFP_IND_ME_REFL_WR_SIG_INIT, cpp_params.token, cpp_params.island); result = nfp_cpp_writel(cpp, cpp_id, csr_ctx_ptr_offs, context); - if (result != sizeof(context)) - return result < 0 ? result : -EIO; + if (result) + return result; cpp_id = nfp_get_numeric_cpp_id(&cpp_params); result = nfp_cpp_read(cpp, cpp_id, csr_ctx_ptr_offs, dest, reg_sz); @@ -641,8 +640,8 @@ nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec, struct nfp_dump_rtsym *dump_header = dump->p; struct nfp_dumpspec_cpp_isl_id cpp_params; struct nfp_rtsym_table *rtbl = pf->rtbl; + u32 header_size, total_size, sym_size; const struct nfp_rtsym *sym; - u32 header_size, total_size; u32 tl_len, key_len; int bytes_read; u32 cpp_id; @@ -658,9 +657,14 @@ nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec, if (!sym) return nfp_dump_error_tlv(&spec->tl, -ENOENT, dump); + if (sym->type == NFP_RTSYM_TYPE_ABS) + sym_size = sizeof(sym->addr); + else + sym_size = sym->size; + header_size = ALIGN8(offsetof(struct nfp_dump_rtsym, rtsym) + key_len + 1); - total_size = header_size + ALIGN8(sym->size); + total_size = header_size + ALIGN8(sym_size); dest = dump->p + header_size; err = nfp_add_tlv(be32_to_cpu(spec->tl.type), total_size, dump); @@ -670,9 +674,9 @@ nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec, dump_header->padded_name_length = header_size - offsetof(struct nfp_dump_rtsym, rtsym); memcpy(dump_header->rtsym, spec->rtsym, key_len + 1); + dump_header->cpp.dump_length = cpu_to_be32(sym_size); if (sym->type == NFP_RTSYM_TYPE_ABS) { - dump_header->cpp.dump_length = cpu_to_be32(sizeof(sym->addr)); *(u64 *)dest = sym->addr; } else { cpp_params.target = sym->target; @@ -682,10 +686,9 @@ nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec, cpp_id = nfp_get_numeric_cpp_id(&cpp_params); dump_header->cpp.cpp_id = cpp_params; dump_header->cpp.offset = cpu_to_be32(sym->addr); - dump_header->cpp.dump_length = cpu_to_be32(sym->size); bytes_read = nfp_cpp_read(pf->cpp, cpp_id, sym->addr, dest, - sym->size); - if (bytes_read != sym->size) { + sym_size); + if (bytes_read != sym_size) { if (bytes_read >= 0) bytes_read = -EIO; dump_header->error = cpu_to_be32(bytes_read); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index c505014121c4..15fa47f622aa 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -208,12 +208,6 @@ nfp_net_pf_init_vnic(struct nfp_pf *pf, struct nfp_net *nn, unsigned int id) { int err; - /* Get ME clock frequency from ctrl BAR - * XXX for now frequency is hardcoded until we figure out how - * to get the value from nfp-hwinfo into ctrl bar - */ - nn->me_freq_mhz = 1200; - err = nfp_net_init(nn); if (err) return err; @@ -373,7 +367,9 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) if (IS_ERR(pf->app)) return PTR_ERR(pf->app); + mutex_lock(&pf->lock); err = nfp_app_init(pf->app); + mutex_unlock(&pf->lock); if (err) goto err_free; @@ -401,7 +397,9 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) err_unmap: nfp_cpp_area_release_free(pf->ctrl_vnic_bar); err_app_clean: + mutex_lock(&pf->lock); nfp_app_clean(pf->app); + mutex_unlock(&pf->lock); err_free: nfp_app_free(pf->app); pf->app = NULL; @@ -414,7 +412,11 @@ static void nfp_net_pf_app_clean(struct nfp_pf *pf) nfp_net_pf_free_vnic(pf, pf->ctrl_vnic); nfp_cpp_area_release_free(pf->ctrl_vnic_bar); } + + mutex_lock(&pf->lock); nfp_app_clean(pf->app); + mutex_unlock(&pf->lock); + nfp_app_free(pf->app); pf->app = NULL; } @@ -570,17 +572,6 @@ err_unmap_ctrl: return err; } -static void nfp_net_pci_remove_finish(struct nfp_pf *pf) -{ - nfp_net_pf_app_stop(pf); - /* stop app first, to avoid double free of ctrl vNIC's ddir */ - nfp_net_debugfs_dir_clean(&pf->ddir); - - nfp_net_pf_free_irqs(pf); - nfp_net_pf_app_clean(pf); - nfp_net_pci_unmap_mem(pf); -} - static int nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port, struct nfp_eth_table *eth_table) @@ -655,9 +646,6 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf) nfp_net_pf_free_vnic(pf, nn); } - if (list_empty(&pf->vnics)) - nfp_net_pci_remove_finish(pf); - return 0; } @@ -707,6 +695,7 @@ int nfp_net_refresh_eth_port(struct nfp_port *port) */ int nfp_net_pci_probe(struct nfp_pf *pf) { + struct devlink *devlink = priv_to_devlink(pf); struct nfp_net_fw_version fw_ver; u8 __iomem *ctrl_bar, *qc_bar; int stride; @@ -720,16 +709,13 @@ int nfp_net_pci_probe(struct nfp_pf *pf) return -EINVAL; } - mutex_lock(&pf->lock); pf->max_data_vnics = nfp_net_pf_get_num_ports(pf); - if ((int)pf->max_data_vnics < 0) { - err = pf->max_data_vnics; - goto err_unlock; - } + if ((int)pf->max_data_vnics < 0) + return pf->max_data_vnics; err = nfp_net_pci_map_mem(pf); if (err) - goto err_unlock; + return err; ctrl_bar = nfp_cpp_area_iomem(pf->data_vnic_bar); qc_bar = nfp_cpp_area_iomem(pf->qc_area); @@ -768,6 +754,11 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (err) goto err_unmap; + err = devlink_register(devlink, &pf->pdev->dev); + if (err) + goto err_app_clean; + + mutex_lock(&pf->lock); pf->ddir = nfp_net_debugfs_device_add(pf->pdev); /* Allocate the vnics and do basic init */ @@ -799,32 +790,39 @@ err_free_vnics: nfp_net_pf_free_vnics(pf); err_clean_ddir: nfp_net_debugfs_dir_clean(&pf->ddir); + mutex_unlock(&pf->lock); + cancel_work_sync(&pf->port_refresh_work); + devlink_unregister(devlink); +err_app_clean: nfp_net_pf_app_clean(pf); err_unmap: nfp_net_pci_unmap_mem(pf); -err_unlock: - mutex_unlock(&pf->lock); - cancel_work_sync(&pf->port_refresh_work); return err; } void nfp_net_pci_remove(struct nfp_pf *pf) { - struct nfp_net *nn; + struct nfp_net *nn, *next; mutex_lock(&pf->lock); - if (list_empty(&pf->vnics)) - goto out; - - list_for_each_entry(nn, &pf->vnics, vnic_list) - if (nfp_net_is_data_vnic(nn)) - nfp_net_pf_clean_vnic(pf, nn); + list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) { + if (!nfp_net_is_data_vnic(nn)) + continue; + nfp_net_pf_clean_vnic(pf, nn); + nfp_net_pf_free_vnic(pf, nn); + } - nfp_net_pf_free_vnics(pf); + nfp_net_pf_app_stop(pf); + /* stop app first, to avoid double free of ctrl vNIC's ddir */ + nfp_net_debugfs_dir_clean(&pf->ddir); - nfp_net_pci_remove_finish(pf); -out: mutex_unlock(&pf->lock); + devlink_unregister(priv_to_devlink(pf)); + + nfp_net_pf_free_irqs(pf); + nfp_net_pf_app_clean(pf); + nfp_net_pci_unmap_mem(pf); + cancel_work_sync(&pf->port_refresh_work); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 317f87cc3cc6..f67da6bde9da 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -46,6 +46,13 @@ #include "nfp_net_sriov.h" #include "nfp_port.h" +struct net_device * +nfp_repr_get_locked(struct nfp_app *app, struct nfp_reprs *set, unsigned int id) +{ + return rcu_dereference_protected(set->reprs[id], + lockdep_is_held(&app->pf->lock)); +} + static void nfp_repr_inc_tx_stats(struct net_device *netdev, unsigned int len, int tx_status) @@ -369,21 +376,24 @@ static void nfp_repr_clean_and_free(struct nfp_repr *repr) nfp_repr_free(repr); } -void nfp_reprs_clean_and_free(struct nfp_reprs *reprs) +void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs) { + struct net_device *netdev; unsigned int i; - for (i = 0; i < reprs->num_reprs; i++) - if (reprs->reprs[i]) - nfp_repr_clean_and_free(netdev_priv(reprs->reprs[i])); + for (i = 0; i < reprs->num_reprs; i++) { + netdev = nfp_repr_get_locked(app, reprs, i); + if (netdev) + nfp_repr_clean_and_free(netdev_priv(netdev)); + } kfree(reprs); } void -nfp_reprs_clean_and_free_by_type(struct nfp_app *app, - enum nfp_repr_type type) +nfp_reprs_clean_and_free_by_type(struct nfp_app *app, enum nfp_repr_type type) { + struct net_device *netdev; struct nfp_reprs *reprs; int i; @@ -395,14 +405,16 @@ nfp_reprs_clean_and_free_by_type(struct nfp_app *app, /* Preclean must happen before we remove the reprs reference from the * app below. */ - for (i = 0; i < reprs->num_reprs; i++) - if (reprs->reprs[i]) - nfp_app_repr_preclean(app, reprs->reprs[i]); + for (i = 0; i < reprs->num_reprs; i++) { + netdev = nfp_repr_get_locked(app, reprs, i); + if (netdev) + nfp_app_repr_preclean(app, netdev); + } reprs = nfp_app_reprs_set(app, type, NULL); synchronize_rcu(); - nfp_reprs_clean_and_free(reprs); + nfp_reprs_clean_and_free(app, reprs); } struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs) @@ -420,48 +432,29 @@ struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs) int nfp_reprs_resync_phys_ports(struct nfp_app *app) { - struct nfp_reprs *reprs, *old_reprs; + struct net_device *netdev; + struct nfp_reprs *reprs; struct nfp_repr *repr; int i; - old_reprs = - rcu_dereference_protected(app->reprs[NFP_REPR_TYPE_PHYS_PORT], - lockdep_is_held(&app->pf->lock)); - if (!old_reprs) - return 0; - - reprs = nfp_reprs_alloc(old_reprs->num_reprs); + reprs = nfp_reprs_get_locked(app, NFP_REPR_TYPE_PHYS_PORT); if (!reprs) - return -ENOMEM; - - for (i = 0; i < old_reprs->num_reprs; i++) { - if (!old_reprs->reprs[i]) - continue; - - repr = netdev_priv(old_reprs->reprs[i]); - if (repr->port->type == NFP_PORT_INVALID) { - nfp_app_repr_preclean(app, old_reprs->reprs[i]); - continue; - } - - reprs->reprs[i] = old_reprs->reprs[i]; - } - - old_reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); - synchronize_rcu(); + return 0; - /* Now we free up removed representors */ - for (i = 0; i < old_reprs->num_reprs; i++) { - if (!old_reprs->reprs[i]) + for (i = 0; i < reprs->num_reprs; i++) { + netdev = nfp_repr_get_locked(app, reprs, i); + if (!netdev) continue; - repr = netdev_priv(old_reprs->reprs[i]); + repr = netdev_priv(netdev); if (repr->port->type != NFP_PORT_INVALID) continue; + nfp_app_repr_preclean(app, netdev); + rcu_assign_pointer(reprs->reprs[i], NULL); + synchronize_rcu(); nfp_repr_clean(repr); } - kfree(old_reprs); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h index cbc7badf40a0..a621e8ff528e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h @@ -35,6 +35,7 @@ #define NFP_NET_REPR_H struct metadata_dst; +struct nfp_app; struct nfp_net; struct nfp_port; @@ -47,7 +48,7 @@ struct nfp_port; */ struct nfp_reprs { unsigned int num_reprs; - struct net_device *reprs[0]; + struct net_device __rcu *reprs[0]; }; /** @@ -114,16 +115,18 @@ static inline int nfp_repr_get_port_id(struct net_device *netdev) return priv->dst->u.port_info.port_id; } +struct net_device * +nfp_repr_get_locked(struct nfp_app *app, struct nfp_reprs *set, + unsigned int id); + void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len); int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, u32 cmsg_port_id, struct nfp_port *port, struct net_device *pf_netdev); struct net_device *nfp_repr_alloc(struct nfp_app *app); -void -nfp_reprs_clean_and_free(struct nfp_reprs *reprs); -void -nfp_reprs_clean_and_free_by_type(struct nfp_app *app, - enum nfp_repr_type type); +void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs); +void nfp_reprs_clean_and_free_by_type(struct nfp_app *app, + enum nfp_repr_type type); struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs); int nfp_reprs_resync_phys_ports(struct nfp_app *app); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index c879626e035b..b802a1d55449 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -277,12 +277,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, } nfp_net_irqs_assign(nn, vf->irq_entries, num_irqs); - /* Get ME clock frequency from ctrl BAR - * XXX for now frequency is hardcoded until we figure out how - * to get the value from nfp-hwinfo into ctrl bar - */ - nn->me_freq_mhz = 1200; - err = nfp_net_init(nn); if (err) goto err_irqs_disable; diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index 28262470dabf..ef30597aa319 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -674,18 +674,20 @@ void __iomem *nfp_cpp_area_iomem(struct nfp_cpp_area *area) * @offset: Offset into area * @value: Pointer to read buffer * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_cpp_area_readl(struct nfp_cpp_area *area, unsigned long offset, u32 *value) { u8 tmp[4]; - int err; + int n; - err = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp)); - *value = get_unaligned_le32(tmp); + n = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp)); + if (n != sizeof(tmp)) + return n < 0 ? n : -EIO; - return err; + *value = get_unaligned_le32(tmp); + return 0; } /** @@ -694,16 +696,18 @@ int nfp_cpp_area_readl(struct nfp_cpp_area *area, * @offset: Offset into area * @value: Value to write * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_cpp_area_writel(struct nfp_cpp_area *area, unsigned long offset, u32 value) { u8 tmp[4]; + int n; put_unaligned_le32(value, tmp); + n = nfp_cpp_area_write(area, offset, &tmp, sizeof(tmp)); - return nfp_cpp_area_write(area, offset, &tmp, sizeof(tmp)); + return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO; } /** @@ -712,18 +716,20 @@ int nfp_cpp_area_writel(struct nfp_cpp_area *area, * @offset: Offset into area * @value: Pointer to read buffer * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_cpp_area_readq(struct nfp_cpp_area *area, unsigned long offset, u64 *value) { u8 tmp[8]; - int err; + int n; - err = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp)); - *value = get_unaligned_le64(tmp); + n = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp)); + if (n != sizeof(tmp)) + return n < 0 ? n : -EIO; - return err; + *value = get_unaligned_le64(tmp); + return 0; } /** @@ -732,16 +738,18 @@ int nfp_cpp_area_readq(struct nfp_cpp_area *area, * @offset: Offset into area * @value: Value to write * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_cpp_area_writeq(struct nfp_cpp_area *area, unsigned long offset, u64 value) { u8 tmp[8]; + int n; put_unaligned_le64(value, tmp); + n = nfp_cpp_area_write(area, offset, &tmp, sizeof(tmp)); - return nfp_cpp_area_write(area, offset, &tmp, sizeof(tmp)); + return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO; } /** @@ -1080,7 +1088,7 @@ static u32 nfp_xpb_to_cpp(struct nfp_cpp *cpp, u32 *xpb_addr) * @xpb_addr: Address for operation * @value: Pointer to read buffer * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_xpb_readl(struct nfp_cpp *cpp, u32 xpb_addr, u32 *value) { @@ -1095,7 +1103,7 @@ int nfp_xpb_readl(struct nfp_cpp *cpp, u32 xpb_addr, u32 *value) * @xpb_addr: Address for operation * @value: Value to write * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_xpb_writel(struct nfp_cpp *cpp, u32 xpb_addr, u32 value) { @@ -1113,7 +1121,7 @@ int nfp_xpb_writel(struct nfp_cpp *cpp, u32 xpb_addr, u32 value) * * KERNEL: This operation is safe to call in interrupt or softirq context. * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_xpb_writelm(struct nfp_cpp *cpp, u32 xpb_tgt, u32 mask, u32 value) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c index ab86bceb93f2..20bad05e2e92 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c @@ -64,18 +64,20 @@ * @address: Address for operation * @value: Pointer to read buffer * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_cpp_readl(struct nfp_cpp *cpp, u32 cpp_id, unsigned long long address, u32 *value) { u8 tmp[4]; - int err; + int n; - err = nfp_cpp_read(cpp, cpp_id, address, tmp, sizeof(tmp)); - *value = get_unaligned_le32(tmp); + n = nfp_cpp_read(cpp, cpp_id, address, tmp, sizeof(tmp)); + if (n != sizeof(tmp)) + return n < 0 ? n : -EIO; - return err; + *value = get_unaligned_le32(tmp); + return 0; } /** @@ -85,15 +87,18 @@ int nfp_cpp_readl(struct nfp_cpp *cpp, u32 cpp_id, * @address: Address for operation * @value: Value to write * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_cpp_writel(struct nfp_cpp *cpp, u32 cpp_id, unsigned long long address, u32 value) { u8 tmp[4]; + int n; put_unaligned_le32(value, tmp); - return nfp_cpp_write(cpp, cpp_id, address, tmp, sizeof(tmp)); + n = nfp_cpp_write(cpp, cpp_id, address, tmp, sizeof(tmp)); + + return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO; } /** @@ -103,18 +108,20 @@ int nfp_cpp_writel(struct nfp_cpp *cpp, u32 cpp_id, * @address: Address for operation * @value: Pointer to read buffer * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_cpp_readq(struct nfp_cpp *cpp, u32 cpp_id, unsigned long long address, u64 *value) { u8 tmp[8]; - int err; + int n; - err = nfp_cpp_read(cpp, cpp_id, address, tmp, sizeof(tmp)); - *value = get_unaligned_le64(tmp); + n = nfp_cpp_read(cpp, cpp_id, address, tmp, sizeof(tmp)); + if (n != sizeof(tmp)) + return n < 0 ? n : -EIO; - return err; + *value = get_unaligned_le64(tmp); + return 0; } /** @@ -124,15 +131,18 @@ int nfp_cpp_readq(struct nfp_cpp *cpp, u32 cpp_id, * @address: Address for operation * @value: Value to write * - * Return: length of the io, or -ERRNO + * Return: 0 on success, or -ERRNO */ int nfp_cpp_writeq(struct nfp_cpp *cpp, u32 cpp_id, unsigned long long address, u64 value) { u8 tmp[8]; + int n; put_unaligned_le64(value, tmp); - return nfp_cpp_write(cpp, cpp_id, address, tmp, sizeof(tmp)); + n = nfp_cpp_write(cpp, cpp_id, address, tmp, sizeof(tmp)); + + return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO; } /* NOTE: This code should not use nfp_xpb_* functions, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c index ecda474ac7c3..46107aefad1c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c @@ -277,10 +277,6 @@ u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name, break; } - if (err == sym->size) - err = 0; - else if (err >= 0) - err = -EIO; exit: if (error) *error = err; diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 21e15cb2f62e..66c665d0b926 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -795,7 +795,7 @@ struct fe_priv { */ union ring_type get_rx, put_rx, last_rx; struct nv_skb_map *get_rx_ctx, *put_rx_ctx; - struct nv_skb_map *first_rx_ctx, *last_rx_ctx; + struct nv_skb_map *last_rx_ctx; struct nv_skb_map *rx_skb; union ring_type rx_ring; @@ -1835,7 +1835,7 @@ static int nv_alloc_rx(struct net_device *dev) if (unlikely(np->put_rx.orig++ == np->last_rx.orig)) np->put_rx.orig = np->rx_ring.orig; if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx)) - np->put_rx_ctx = np->first_rx_ctx; + np->put_rx_ctx = np->rx_skb; } else { packet_dropped: u64_stats_update_begin(&np->swstats_rx_syncp); @@ -1877,7 +1877,7 @@ static int nv_alloc_rx_optimized(struct net_device *dev) if (unlikely(np->put_rx.ex++ == np->last_rx.ex)) np->put_rx.ex = np->rx_ring.ex; if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx)) - np->put_rx_ctx = np->first_rx_ctx; + np->put_rx_ctx = np->rx_skb; } else { packet_dropped: u64_stats_update_begin(&np->swstats_rx_syncp); @@ -1910,7 +1910,8 @@ static void nv_init_rx(struct net_device *dev) np->last_rx.orig = &np->rx_ring.orig[np->rx_ring_size-1]; else np->last_rx.ex = &np->rx_ring.ex[np->rx_ring_size-1]; - np->get_rx_ctx = np->put_rx_ctx = np->first_rx_ctx = np->rx_skb; + np->get_rx_ctx = np->rx_skb; + np->put_rx_ctx = np->rx_skb; np->last_rx_ctx = &np->rx_skb[np->rx_ring_size-1]; for (i = 0; i < np->rx_ring_size; i++) { @@ -2914,7 +2915,7 @@ next_pkt: if (unlikely(np->get_rx.orig++ == np->last_rx.orig)) np->get_rx.orig = np->rx_ring.orig; if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx)) - np->get_rx_ctx = np->first_rx_ctx; + np->get_rx_ctx = np->rx_skb; rx_work++; } @@ -3003,7 +3004,7 @@ next_pkt: if (unlikely(np->get_rx.ex++ == np->last_rx.ex)) np->get_rx.ex = np->rx_ring.ex; if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx)) - np->get_rx_ctx = np->first_rx_ctx; + np->get_rx_ctx = np->rx_skb; rx_work++; } @@ -5510,11 +5511,9 @@ static int nv_open(struct net_device *dev) /* One manual link speed update: Interrupts are enabled, future link * speed changes cause interrupts and are handled by nv_link_irq(). */ - { - u32 miistat; - miistat = readl(base + NvRegMIIStatus); - writel(NVREG_MIISTAT_MASK_ALL, base + NvRegMIIStatus); - } + readl(base + NvRegMIIStatus); + writel(NVREG_MIISTAT_MASK_ALL, base + NvRegMIIStatus); + /* set linkspeed to invalid value, thus force nv_update_linkspeed * to init hw */ np->linkspeed = 0; diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index bdc46f11ce45..f6bf54614d7d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -358,10 +358,27 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) kfree(p_rdma_info); } +static void qed_rdma_free_tid(void *rdma_cxt, u32 itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +static void qed_rdma_free_reserved_lkey(struct qed_hwfn *p_hwfn) +{ + qed_rdma_free_tid(p_hwfn, p_hwfn->p_rdma_info->dev->reserved_lkey); +} + static void qed_rdma_free(struct qed_hwfn *p_hwfn) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n"); + qed_rdma_free_reserved_lkey(p_hwfn); qed_rdma_resc_free(p_hwfn); } @@ -615,9 +632,6 @@ static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn) { struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; - /* The first DPI is reserved for the Kernel */ - __set_bit(0, p_hwfn->p_rdma_info->dpi_map.bitmap); - /* Tid 0 will be used as the key for "reserved MR". * The driver should allocate memory for it so it can be loaded but no * ramrod should be passed on it. @@ -797,17 +811,6 @@ static struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) return p_hwfn->p_rdma_info->dev; } -static void qed_rdma_free_tid(void *rdma_cxt, u32 itid) -{ - struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; - - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); - - spin_lock_bh(&p_hwfn->p_rdma_info->lock); - qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid); - spin_unlock_bh(&p_hwfn->p_rdma_info->lock); -} - static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) { struct qed_hwfn *p_hwfn; diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.h b/drivers/net/ethernet/qualcomm/emac/emac-mac.h index 5028fb4bec2b..4beedb8faa1e 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.h +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.h @@ -114,8 +114,9 @@ struct emac_tpd { #define TPD_INSTC_SET(tpd, val) BITS_SET((tpd)->word[3], 17, 17, val) /* High-14bit Buffer Address, So, the 64b-bit address is * {DESC_CTRL_11_TX_DATA_HIADDR[17:0],(register) BUFFER_ADDR_H, BUFFER_ADDR_L} + * Extend TPD_BUFFER_ADDR_H to [31, 18], because we never enable timestamping. */ -#define TPD_BUFFER_ADDR_H_SET(tpd, val) BITS_SET((tpd)->word[3], 18, 30, val) +#define TPD_BUFFER_ADDR_H_SET(tpd, val) BITS_SET((tpd)->word[3], 18, 31, val) /* Format D. Word offset from the 1st byte of this packet to start to calculate * the custom checksum. */ diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 38c924bdd32e..13235baf4766 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -615,8 +615,11 @@ static int emac_probe(struct platform_device *pdev) u32 reg; int ret; - /* The TPD buffer address is limited to 45 bits. */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(45)); + /* The TPD buffer address is limited to: + * 1. PTP: 45bits. (Driver doesn't support yet.) + * 2. NON-PTP: 46bits. + */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(46)); if (ret) { dev_err(&pdev->dev, "could not set DMA mask\n"); return ret; diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 8ae467db9162..75fbf58e421c 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -322,6 +322,25 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) return 0; } +static void efx_ef10_read_licensed_features(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_LICENSING_V3_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_LICENSING_V3_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, LICENSING_V3_IN_OP, + MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_LICENSING_V3, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc || (outlen < MC_CMD_LICENSING_V3_OUT_LEN)) + return; + + nic_data->licensed_features = MCDI_QWORD(outbuf, + LICENSING_V3_OUT_LICENSED_FEATURES); +} + static int efx_ef10_get_sysclk_freq(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CLOCK_OUT_LEN); @@ -722,6 +741,8 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc < 0) goto fail5; + efx_ef10_read_licensed_features(efx); + /* We can have one VI for each vi_stride-byte region. * However, until we use TX option descriptors we need two TX queues * per channel. @@ -760,14 +781,7 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc && rc != -EPERM) goto fail5; - rc = efx_ptp_probe(efx, NULL); - /* Failure to probe PTP is not fatal. - * In the case of EPERM, efx_ptp_probe will print its own message (in - * efx_ptp_get_attributes()), so we don't need to. - */ - if (rc && rc != -EPERM) - netif_warn(efx, drv, efx->net_dev, - "Failed to probe PTP, rc=%d\n", rc); + efx_ptp_defer_probe_with_channel(efx); #ifdef CONFIG_SFC_SRIOV if ((efx->pci_dev->physfn) && (!efx->pci_dev->is_physfn)) { @@ -937,6 +951,11 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx) /* Link a buffer to each TX queue */ efx_for_each_channel(channel, efx) { + /* Extra channels, even those with TXQs (PTP), do not require + * PIO resources. + */ + if (!channel->type->want_pio) + continue; efx_for_each_channel_tx_queue(tx_queue, channel) { /* We assign the PIO buffers to queues in * reverse order to allow for the following @@ -1284,7 +1303,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) void __iomem *membase; int rc; - channel_vis = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); + channel_vis = max(efx->n_channels, + (efx->n_tx_channels + efx->n_extra_tx_channels) * + EFX_TXQ_TYPES); #ifdef EFX_USE_PIO /* Try to allocate PIO buffers if wanted and if the full @@ -2408,12 +2429,25 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) int i; BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0); + /* Only attempt to enable TX timestamping if we have the license for it, + * otherwise TXQ init will fail + */ + if (!(nic_data->licensed_features & + (1 << LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN))) { + tx_queue->timestamping = false; + /* Disable sync events on this channel. */ + if (efx->type->ptp_set_ts_sync_events) + efx->type->ptp_set_ts_sync_events(efx, false, false); + } + /* TSOv2 is a limited resource that can only be configured on a limited * number of queues. TSO without checksum offload is not really a thing, * so we only enable it for those queues. + * TSOv2 cannot be used with Hardware timestamping. */ if (csum_offload && (nic_data->datapath_caps2 & - (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))) { + (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) && + !tx_queue->timestamping) { tso_v2 = true; netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n", channel->channel); @@ -2439,14 +2473,16 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); do { - MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS, + MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS, /* This flag was removed from mcdi_pcol.h for * the non-_EXT version of INIT_TXQ. However, * firmware still honours it. */ INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2, INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, - INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload); + INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload, + INIT_TXQ_EXT_IN_FLAG_TIMESTAMP, + tx_queue->timestamping); rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen, NULL, 0, NULL); @@ -2472,12 +2508,13 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION; tx_queue->insert_count = 1; txd = efx_tx_desc(tx_queue, 0); - EFX_POPULATE_QWORD_4(*txd, + EFX_POPULATE_QWORD_5(*txd, ESF_DZ_TX_DESC_IS_OPT, true, ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_CRC_CSUM, ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload, - ESF_DZ_TX_OPTION_IP_CSUM, csum_offload); + ESF_DZ_TX_OPTION_IP_CSUM, csum_offload, + ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping); tx_queue->write_count = 1; if (tso_v2) { @@ -3572,31 +3609,92 @@ static int efx_ef10_handle_rx_event(struct efx_channel *channel, return n_packets; } -static int +static u32 efx_ef10_extract_event_ts(efx_qword_t *event) +{ + u32 tstamp; + + tstamp = EFX_QWORD_FIELD(*event, TX_TIMESTAMP_EVENT_TSTAMP_DATA_HI); + tstamp <<= 16; + tstamp |= EFX_QWORD_FIELD(*event, TX_TIMESTAMP_EVENT_TSTAMP_DATA_LO); + + return tstamp; +} + +static void efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) { struct efx_nic *efx = channel->efx; struct efx_tx_queue *tx_queue; unsigned int tx_ev_desc_ptr; unsigned int tx_ev_q_label; - int tx_descs = 0; + unsigned int tx_ev_type; + u64 ts_part; if (unlikely(READ_ONCE(efx->reset_pending))) - return 0; + return; if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT))) - return 0; + return; - /* Transmit completion */ - tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX); + /* Get the transmit queue */ tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL); tx_queue = efx_channel_get_tx_queue(channel, tx_ev_q_label % EFX_TXQ_TYPES); - tx_descs = ((tx_ev_desc_ptr + 1 - tx_queue->read_count) & - tx_queue->ptr_mask); - efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask); - return tx_descs; + if (!tx_queue->timestamping) { + /* Transmit completion */ + tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX); + efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask); + return; + } + + /* Transmit timestamps are only available for 8XXX series. They result + * in three events per packet. These occur in order, and are: + * - the normal completion event + * - the low part of the timestamp + * - the high part of the timestamp + * + * Each part of the timestamp is itself split across two 16 bit + * fields in the event. + */ + tx_ev_type = EFX_QWORD_FIELD(*event, ESF_EZ_TX_SOFT1); + + switch (tx_ev_type) { + case TX_TIMESTAMP_EVENT_TX_EV_COMPLETION: + /* In case of Queue flush or FLR, we might have received + * the previous TX completion event but not the Timestamp + * events. + */ + if (tx_queue->completed_desc_ptr != tx_queue->ptr_mask) + efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr); + + tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, + ESF_DZ_TX_DESCR_INDX); + tx_queue->completed_desc_ptr = + tx_ev_desc_ptr & tx_queue->ptr_mask; + break; + + case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO: + ts_part = efx_ef10_extract_event_ts(event); + tx_queue->completed_timestamp_minor = ts_part; + break; + + case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI: + ts_part = efx_ef10_extract_event_ts(event); + tx_queue->completed_timestamp_major = ts_part; + + efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr); + tx_queue->completed_desc_ptr = tx_queue->ptr_mask; + break; + + default: + netif_err(efx, hw, efx->net_dev, + "channel %d unknown tx event type %d (data " + EFX_QWORD_FMT ")\n", + channel->channel, tx_ev_type, + EFX_QWORD_VAL(*event)); + break; + } } static void @@ -3658,7 +3756,6 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota) efx_qword_t event, *p_event; unsigned int read_ptr; int ev_code; - int tx_descs = 0; int spent = 0; if (quota <= 0) @@ -3698,13 +3795,7 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota) } break; case ESE_DZ_EV_CODE_TX_EV: - tx_descs += efx_ef10_handle_tx_event(channel, &event); - if (tx_descs > efx->txq_entries) { - spent = quota; - goto out; - } else if (++spent == quota) { - goto out; - } + efx_ef10_handle_tx_event(channel, &event); break; case ESE_DZ_EV_CODE_DRIVER_EV: efx_ef10_handle_driver_event(channel, &event); @@ -6179,7 +6270,8 @@ static int efx_ef10_ptp_set_ts_sync_events(struct efx_nic *efx, bool en, efx_ef10_rx_enable_timestamping : efx_ef10_rx_disable_timestamping; - efx_for_each_channel(channel, efx) { + channel = efx_ptp_channel(efx); + if (channel) { int rc = set(channel, temp); if (en && rc != 0) { efx_ef10_ptp_set_ts_sync_events(efx, false, temp); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 12f0abc30cb1..456866b05641 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -896,12 +896,20 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100)); } +bool efx_default_channel_want_txqs(struct efx_channel *channel) +{ + return channel->channel - channel->efx->tx_channel_offset < + channel->efx->n_tx_channels; +} + static const struct efx_channel_type efx_default_channel_type = { .pre_probe = efx_channel_dummy_op_int, .post_remove = efx_channel_dummy_op_void, .get_name = efx_get_channel_name, .copy = efx_copy_channel, + .want_txqs = efx_default_channel_want_txqs, .keep_eventq = false, + .want_pio = true, }; int efx_channel_dummy_op_int(struct efx_channel *channel) @@ -1501,6 +1509,7 @@ static int efx_probe_interrupts(struct efx_nic *efx) } /* Assign extra channels if possible */ + efx->n_extra_tx_channels = 0; j = efx->n_channels; for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { if (!efx->extra_channel_type[i]) @@ -1512,6 +1521,8 @@ static int efx_probe_interrupts(struct efx_nic *efx) --j; efx_get_channel(efx, j)->type = efx->extra_channel_type[i]; + if (efx_channel_has_tx_queues(efx_get_channel(efx, j))) + efx->n_extra_tx_channels++; } } diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 5334dc83d926..266b9bee1f3a 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -818,17 +818,16 @@ static void efx_farch_magic_event(struct efx_channel *channel, u32 magic) * The NIC batches TX completion events; the message we receive is of * the form "complete all TX events up to this index". */ -static int +static void efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) { unsigned int tx_ev_desc_ptr; unsigned int tx_ev_q_label; struct efx_tx_queue *tx_queue; struct efx_nic *efx = channel->efx; - int tx_packets = 0; if (unlikely(READ_ONCE(efx->reset_pending))) - return 0; + return; if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) { /* Transmit completion */ @@ -836,8 +835,6 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL); tx_queue = efx_channel_get_tx_queue( channel, tx_ev_q_label % EFX_TXQ_TYPES); - tx_packets = ((tx_ev_desc_ptr - tx_queue->read_count) & - tx_queue->ptr_mask); efx_xmit_done(tx_queue, tx_ev_desc_ptr); } else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) { /* Rewrite the FIFO write pointer */ @@ -856,8 +853,6 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) EFX_QWORD_FMT"\n", channel->channel, EFX_QWORD_VAL(*event)); } - - return tx_packets; } /* Detect errors included in the rx_evt_pkt_ok bit. */ @@ -1090,7 +1085,7 @@ efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) int qid; qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA); - if (qid < EFX_TXQ_TYPES * efx->n_tx_channels) { + if (qid < EFX_TXQ_TYPES * (efx->n_tx_channels + efx->n_extra_tx_channels)) { tx_queue = efx_get_tx_queue(efx, qid / EFX_TXQ_TYPES, qid % EFX_TXQ_TYPES); if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) { @@ -1270,7 +1265,6 @@ int efx_farch_ev_process(struct efx_channel *channel, int budget) unsigned int read_ptr; efx_qword_t event, *p_event; int ev_code; - int tx_packets = 0; int spent = 0; if (budget <= 0) @@ -1304,12 +1298,7 @@ int efx_farch_ev_process(struct efx_channel *channel, int budget) goto out; break; case FSE_AZ_EV_CODE_TX_EV: - tx_packets += efx_farch_handle_tx_event(channel, - &event); - if (tx_packets > efx->txq_entries) { - spent = budget; - goto out; - } + efx_farch_handle_tx_event(channel, &event); break; case FSE_AZ_EV_CODE_DRV_GEN_EV: efx_farch_handle_generated_event(channel, &event); @@ -1680,20 +1669,21 @@ void efx_farch_rx_pull_indir_table(struct efx_nic *efx) */ void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) { - unsigned vi_count, buftbl_min; + unsigned vi_count, buftbl_min, total_tx_channels; #ifdef CONFIG_SFC_SRIOV struct siena_nic_data *nic_data = efx->nic_data; #endif + total_tx_channels = efx->n_tx_channels + efx->n_extra_tx_channels; /* Account for the buffer table entries backing the datapath channels * and the descriptor caches for those channels. */ buftbl_min = ((efx->n_rx_channels * EFX_MAX_DMAQ_SIZE + - efx->n_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE + + total_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE + efx->n_channels * EFX_MAX_EVQ_SIZE) * sizeof(efx_qword_t) / EFX_BUF_SIZE); - vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); + vi_count = max(efx->n_channels, total_tx_channels * EFX_TXQ_TYPES); #ifdef CONFIG_SFC_SRIOV if (efx->type->sriov_wanted) { diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 3dd42f3136fe..d20a8660ee48 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -191,6 +191,7 @@ struct efx_tx_buffer { * Size of the region is efx_piobuf_size. * @piobuf_offset: Buffer offset to be specified in PIO descriptors * @initialised: Has hardware queue been initialised? + * @timestamping: Is timestamping enabled for this channel? * @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and * may also map tx data, depending on the nature of the TSO implementation. * @read_count: Current read pointer. @@ -202,6 +203,10 @@ struct efx_tx_buffer { * avoid cache-line ping-pong between the xmit path and the * completion path. * @merge_events: Number of TX merged completion events + * @completed_desc_ptr: Most recent completed pointer - only used with + * timestamping. + * @completed_timestamp_major: Top part of the most recent tx timestamp. + * @completed_timestamp_minor: Low part of the most recent tx timestamp. * @insert_count: Current insert pointer * This is the number of buffers that have been added to the * software ring. @@ -247,6 +252,7 @@ struct efx_tx_queue { void __iomem *piobuf; unsigned int piobuf_offset; bool initialised; + bool timestamping; /* Function pointers used in the fast path. */ int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *); @@ -257,6 +263,9 @@ struct efx_tx_queue { unsigned int merge_events; unsigned int bytes_compl; unsigned int pkts_compl; + unsigned int completed_desc_ptr; + u32 completed_timestamp_major; + u32 completed_timestamp_minor; /* Members used only on the xmit path */ unsigned int insert_count ____cacheline_aligned_in_smp; @@ -522,8 +531,12 @@ struct efx_msi_context { * @copy: Copy the channel state prior to reallocation. May be %NULL if * reallocation is not supported. * @receive_skb: Handle an skb ready to be passed to netif_receive_skb() + * @want_txqs: Determine whether this channel should have TX queues + * created. If %NULL, TX queues are not created. * @keep_eventq: Flag for whether event queue should be kept initialised * while the device is stopped + * @want_pio: Flag for whether PIO buffers should be linked to this + * channel's TX queues. */ struct efx_channel_type { void (*handle_no_channel)(struct efx_nic *); @@ -532,7 +545,9 @@ struct efx_channel_type { void (*get_name)(struct efx_channel *, char *buf, size_t len); struct efx_channel *(*copy)(const struct efx_channel *); bool (*receive_skb)(struct efx_channel *, struct sk_buff *); + bool (*want_txqs)(struct efx_channel *); bool keep_eventq; + bool want_pio; }; enum efx_led_mode { @@ -735,6 +750,7 @@ struct vfdi_status; * @n_channels: Number of channels in use * @n_rx_channels: Number of channels used for RX (= number of RX queues) * @n_tx_channels: Number of channels used for TX + * @n_extra_tx_channels: Number of extra channels with TX queues * @rx_ip_align: RX DMA address offset to have IP header aligned in * in accordance with NET_IP_ALIGN * @rx_dma_len: Current maximum RX DMA length @@ -881,6 +897,7 @@ struct efx_nic { unsigned rss_spread; unsigned tx_channel_offset; unsigned n_tx_channels; + unsigned n_extra_tx_channels; unsigned int rx_ip_align; unsigned int rx_dma_len; unsigned int rx_buffer_order; @@ -1363,8 +1380,8 @@ efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type) static inline bool efx_channel_has_tx_queues(struct efx_channel *channel) { - return channel->channel - channel->efx->tx_channel_offset < - channel->efx->n_tx_channels; + return channel->type && channel->type->want_txqs && + channel->type->want_txqs(channel); } static inline struct efx_tx_queue * diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 763052214525..6549fc685a48 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -440,6 +440,7 @@ struct efx_ef10_nic_data { struct efx_udp_tunnel udp_tunnels[16]; bool udp_tunnels_dirty; struct mutex udp_tunnels_lock; + u64 licensed_features; }; int efx_init_sriov(void); @@ -448,6 +449,7 @@ void efx_fini_sriov(void); struct ethtool_ts_info; int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel); void efx_ptp_defer_probe_with_channel(struct efx_nic *efx); +struct efx_channel *efx_ptp_channel(struct efx_nic *efx); void efx_ptp_remove(struct efx_nic *efx); int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr); int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr); @@ -471,6 +473,8 @@ static inline void efx_rx_skb_attach_timestamp(struct efx_channel *channel, } void efx_ptp_start_datapath(struct efx_nic *efx); void efx_ptp_stop_datapath(struct efx_nic *efx); +bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx); +ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue); extern const struct efx_nic_type falcon_a1_nic_type; extern const struct efx_nic_type falcon_b0_nic_type; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 3b37d7ded3c4..433d29d6bc95 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -149,18 +149,14 @@ enum ptp_packet_state { /* Maximum parts-per-billion adjustment that is acceptable */ #define MAX_PPB 1000000 -/* Number of bits required to hold the above */ -#define MAX_PPB_BITS 20 - -/* Number of extra bits allowed when calculating fractional ns. - * EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS + MAX_PPB_BITS should - * be less than 63. - */ -#define PPB_EXTRA_BITS 2 - /* Precalculate scale word to avoid long long division at runtime */ -#define PPB_SCALE_WORD ((1LL << (PPB_EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS +\ - MAX_PPB_BITS)) / 1000000000LL) +/* This is equivalent to 2^66 / 10^9. */ +#define PPB_SCALE_WORD ((1LL << (57)) / 1953125LL) + +/* How much to shift down after scaling to convert to FP40 */ +#define PPB_SHIFT_FP40 26 +/* ... and FP44. */ +#define PPB_SHIFT_FP44 22 #define PTP_SYNC_ATTEMPTS 4 @@ -218,8 +214,8 @@ struct efx_ptp_timeset { * @channel: The PTP channel (Siena only) * @rx_ts_inline: Flag for whether RX timestamps are inline (else they are * separate events) - * @rxq: Receive queue (awaiting timestamps) - * @txq: Transmit queue + * @rxq: Receive SKB queue (awaiting timestamps) + * @txq: Transmit SKB queue * @evt_list: List of MC receive events awaiting packets * @evt_free_list: List of free events * @evt_lock: Lock for manipulating evt_list and evt_free_list @@ -233,19 +229,36 @@ struct efx_ptp_timeset { * @config: Current timestamp configuration * @enabled: PTP operation enabled * @mode: Mode in which PTP operating (PTP version) - * @time_format: Time format supported by this NIC * @ns_to_nic_time: Function to convert from scalar nanoseconds to NIC time * @nic_to_kernel_time: Function to convert from NIC to kernel time + * @nic_time.minor_max: Wrap point for NIC minor times + * @nic_time.sync_event_diff_min: Minimum acceptable difference between time + * in packet prefix and last MCDI time sync event i.e. how much earlier than + * the last sync event time a packet timestamp can be. + * @nic_time.sync_event_diff_max: Maximum acceptable difference between time + * in packet prefix and last MCDI time sync event i.e. how much later than + * the last sync event time a packet timestamp can be. + * @nic_time.sync_event_minor_shift: Shift required to make minor time from + * field in MCDI time sync event. * @min_synchronisation_ns: Minimum acceptable corrected sync window - * @ts_corrections.tx: Required driver correction of transmit timestamps - * @ts_corrections.rx: Required driver correction of receive timestamps + * @capabilities: Capabilities flags from the NIC + * @ts_corrections.ptp_tx: Required driver correction of PTP packet transmit + * timestamps + * @ts_corrections.ptp_rx: Required driver correction of PTP packet receive + * timestamps * @ts_corrections.pps_out: PPS output error (information only) * @ts_corrections.pps_in: Required driver correction of PPS input timestamps + * @ts_corrections.general_tx: Required driver correction of general packet + * transmit timestamps + * @ts_corrections.general_rx: Required driver correction of general packet + * receive timestamps * @evt_frags: Partly assembled PTP events * @evt_frag_idx: Current fragment number * @evt_code: Last event code * @start: Address at which MC indicates ready for synchronisation * @host_time_pps: Host time at last PPS + * @adjfreq_ppb_shift: Shift required to convert scaled parts-per-billion + * frequency adjustment into a fixed point fractional nanosecond format. * @current_adjfreq: Current ppb adjustment. * @phc_clock: Pointer to registered phc device (if primary function) * @phc_clock_info: Registration structure for phc device @@ -264,6 +277,7 @@ struct efx_ptp_timeset { * @oversize_sync_windows: Number of corrected sync windows that are too large * @rx_no_timestamp: Number of packets received without a timestamp. * @timeset: Last set of synchronisation statistics. + * @xmit_skb: Transmit SKB function. */ struct efx_ptp_data { struct efx_nic *efx; @@ -284,22 +298,31 @@ struct efx_ptp_data { struct hwtstamp_config config; bool enabled; unsigned int mode; - unsigned int time_format; void (*ns_to_nic_time)(s64 ns, u32 *nic_major, u32 *nic_minor); ktime_t (*nic_to_kernel_time)(u32 nic_major, u32 nic_minor, s32 correction); + struct { + u32 minor_max; + u32 sync_event_diff_min; + u32 sync_event_diff_max; + unsigned int sync_event_minor_shift; + } nic_time; unsigned int min_synchronisation_ns; + unsigned int capabilities; struct { - s32 tx; - s32 rx; + s32 ptp_tx; + s32 ptp_rx; s32 pps_out; s32 pps_in; + s32 general_tx; + s32 general_rx; } ts_corrections; efx_qword_t evt_frags[MAX_EVENT_FRAGS]; int evt_frag_idx; int evt_code; struct efx_buffer start; struct pps_event_time host_time_pps; + unsigned int adjfreq_ppb_shift; s64 current_adjfreq; struct ptp_clock *phc_clock; struct ptp_clock_info phc_clock_info; @@ -319,6 +342,7 @@ struct efx_ptp_data { unsigned int rx_no_timestamp; struct efx_ptp_timeset timeset[MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_MAXNUM]; + void (*xmit_skb)(struct efx_nic *efx, struct sk_buff *skb); }; static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta); @@ -329,6 +353,24 @@ static int efx_phc_settime(struct ptp_clock_info *ptp, static int efx_phc_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); +bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + return ((efx_nic_rev(efx) >= EFX_REV_HUNT_A0) && + (nic_data->datapath_caps2 & + (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_LBN) + )); +} + +/* PTP 'extra' channel is still a traffic channel, but we only create TX queues + * if PTP uses MAC TX timestamps, not if PTP uses the MC directly to transmit. + */ +bool efx_ptp_want_txqs(struct efx_channel *channel) +{ + return efx_ptp_use_mac_tx_timestamps(channel->efx); +} + #define PTP_SW_STAT(ext_name, field_name) \ { #ext_name, 0, offsetof(struct efx_ptp_data, field_name) } #define PTP_MC_STAT(ext_name, mcdi_name) \ @@ -471,6 +513,89 @@ static ktime_t efx_ptp_s27_to_ktime_correction(u32 nic_major, u32 nic_minor, return efx_ptp_s27_to_ktime(nic_major, nic_minor); } +/* For Medford2 platforms the time is in seconds and quarter nanoseconds. */ +static void efx_ptp_ns_to_s_qns(s64 ns, u32 *nic_major, u32 *nic_minor) +{ + struct timespec64 ts = ns_to_timespec64(ns); + + *nic_major = (u32)ts.tv_sec; + *nic_minor = ts.tv_nsec * 4; +} + +static ktime_t efx_ptp_s_qns_to_ktime_correction(u32 nic_major, u32 nic_minor, + s32 correction) +{ + ktime_t kt; + + nic_minor = DIV_ROUND_CLOSEST(nic_minor, 4); + correction = DIV_ROUND_CLOSEST(correction, 4); + + kt = ktime_set(nic_major, nic_minor); + + if (correction >= 0) + kt = ktime_add_ns(kt, (u64)correction); + else + kt = ktime_sub_ns(kt, (u64)-correction); + return kt; +} + +struct efx_channel *efx_ptp_channel(struct efx_nic *efx) +{ + return efx->ptp_data ? efx->ptp_data->channel : NULL; +} + +static u32 last_sync_timestamp_major(struct efx_nic *efx) +{ + struct efx_channel *channel = efx_ptp_channel(efx); + u32 major = 0; + + if (channel) + major = channel->sync_timestamp_major; + return major; +} + +/* The 8000 series and later can provide the time from the MAC, which is only + * 48 bits long and provides meta-information in the top 2 bits. + */ +static ktime_t +efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx, + struct efx_ptp_data *ptp, + u32 nic_major, u32 nic_minor, + s32 correction) +{ + ktime_t kt = { 0 }; + + if (!(nic_major & 0x80000000)) { + WARN_ON_ONCE(nic_major >> 16); + /* Use the top bits from the latest sync event. */ + nic_major &= 0xffff; + nic_major |= (last_sync_timestamp_major(efx) & 0xffff0000); + + kt = ptp->nic_to_kernel_time(nic_major, nic_minor, + correction); + } + return kt; +} + +ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + struct efx_ptp_data *ptp = efx->ptp_data; + ktime_t kt; + + if (efx_ptp_use_mac_tx_timestamps(efx)) + kt = efx_ptp_mac_nic_to_ktime_correction(efx, ptp, + tx_queue->completed_timestamp_major, + tx_queue->completed_timestamp_minor, + ptp->ts_corrections.general_tx); + else + kt = ptp->nic_to_kernel_time( + tx_queue->completed_timestamp_major, + tx_queue->completed_timestamp_minor, + ptp->ts_corrections.general_tx); + return kt; +} + /* Get PTP attributes and set up time conversions */ static int efx_ptp_get_attributes(struct efx_nic *efx) { @@ -502,31 +627,71 @@ static int efx_ptp_get_attributes(struct efx_nic *efx) return rc; } - if (fmt == MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION) { + switch (fmt) { + case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION: ptp->ns_to_nic_time = efx_ptp_ns_to_s27; ptp->nic_to_kernel_time = efx_ptp_s27_to_ktime_correction; - } else if (fmt == MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS) { + ptp->nic_time.minor_max = 1 << 27; + ptp->nic_time.sync_event_minor_shift = 19; + break; + case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS: ptp->ns_to_nic_time = efx_ptp_ns_to_s_ns; ptp->nic_to_kernel_time = efx_ptp_s_ns_to_ktime_correction; - } else { + ptp->nic_time.minor_max = 1000000000; + ptp->nic_time.sync_event_minor_shift = 22; + break; + case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_QTR_NANOSECONDS: + ptp->ns_to_nic_time = efx_ptp_ns_to_s_qns; + ptp->nic_to_kernel_time = efx_ptp_s_qns_to_ktime_correction; + ptp->nic_time.minor_max = 4000000000; + ptp->nic_time.sync_event_minor_shift = 24; + break; + default: return -ERANGE; } - ptp->time_format = fmt; - - /* MC_CMD_PTP_OP_GET_ATTRIBUTES is an extended version of an older - * operation MC_CMD_PTP_OP_GET_TIME_FORMAT that also returns a value - * to use for the minimum acceptable corrected synchronization window. + /* Precalculate acceptable difference between the minor time in the + * packet prefix and the last MCDI time sync event. We expect the + * packet prefix timestamp to be after of sync event by up to one + * sync event interval (0.25s) but we allow it to exceed this by a + * fuzz factor of (0.1s) + */ + ptp->nic_time.sync_event_diff_min = ptp->nic_time.minor_max + - (ptp->nic_time.minor_max / 10); + ptp->nic_time.sync_event_diff_max = (ptp->nic_time.minor_max / 4) + + (ptp->nic_time.minor_max / 10); + + /* MC_CMD_PTP_OP_GET_ATTRIBUTES has been extended twice from an older + * operation MC_CMD_PTP_OP_GET_TIME_FORMAT. The function now may return + * a value to use for the minimum acceptable corrected synchronization + * window and may return further capabilities. * If we have the extra information store it. For older firmware that * does not implement the extended command use the default value. */ - if (rc == 0 && out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN) + if (rc == 0 && + out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_CAPABILITIES_OFST) ptp->min_synchronisation_ns = MCDI_DWORD(outbuf, PTP_OUT_GET_ATTRIBUTES_SYNC_WINDOW_MIN); else ptp->min_synchronisation_ns = DEFAULT_MIN_SYNCHRONISATION_NS; + if (rc == 0 && + out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN) + ptp->capabilities = MCDI_DWORD(outbuf, + PTP_OUT_GET_ATTRIBUTES_CAPABILITIES); + else + ptp->capabilities = 0; + + /* Set up the shift for conversion between frequency + * adjustments in parts-per-billion and the fixed-point + * fractional ns format that the adapter uses. + */ + if (ptp->capabilities & (1 << MC_CMD_PTP_OUT_GET_ATTRIBUTES_FP44_FREQ_ADJ_LBN)) + ptp->adjfreq_ppb_shift = PPB_SHIFT_FP44; + else + ptp->adjfreq_ppb_shift = PPB_SHIFT_FP40; + return 0; } @@ -534,8 +699,9 @@ static int efx_ptp_get_attributes(struct efx_nic *efx) static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_GET_TIMESTAMP_CORRECTIONS_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN); int rc; + size_t out_len; /* Get the timestamp corrections from the NIC. If this operation is * not supported (older NICs) then no correction is required. @@ -545,21 +711,37 @@ static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx) MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), NULL); + outbuf, sizeof(outbuf), &out_len); if (rc == 0) { - efx->ptp_data->ts_corrections.tx = MCDI_DWORD(outbuf, + efx->ptp_data->ts_corrections.ptp_tx = MCDI_DWORD(outbuf, PTP_OUT_GET_TIMESTAMP_CORRECTIONS_TRANSMIT); - efx->ptp_data->ts_corrections.rx = MCDI_DWORD(outbuf, + efx->ptp_data->ts_corrections.ptp_rx = MCDI_DWORD(outbuf, PTP_OUT_GET_TIMESTAMP_CORRECTIONS_RECEIVE); efx->ptp_data->ts_corrections.pps_out = MCDI_DWORD(outbuf, PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_OUT); efx->ptp_data->ts_corrections.pps_in = MCDI_DWORD(outbuf, PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_IN); + + if (out_len >= MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN) { + efx->ptp_data->ts_corrections.general_tx = MCDI_DWORD( + outbuf, + PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_TX); + efx->ptp_data->ts_corrections.general_rx = MCDI_DWORD( + outbuf, + PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_RX); + } else { + efx->ptp_data->ts_corrections.general_tx = + efx->ptp_data->ts_corrections.ptp_tx; + efx->ptp_data->ts_corrections.general_rx = + efx->ptp_data->ts_corrections.ptp_rx; + } } else if (rc == -EINVAL) { - efx->ptp_data->ts_corrections.tx = 0; - efx->ptp_data->ts_corrections.rx = 0; + efx->ptp_data->ts_corrections.ptp_tx = 0; + efx->ptp_data->ts_corrections.ptp_rx = 0; efx->ptp_data->ts_corrections.pps_out = 0; efx->ptp_data->ts_corrections.pps_in = 0; + efx->ptp_data->ts_corrections.general_tx = 0; + efx->ptp_data->ts_corrections.general_rx = 0; } else { efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), outbuf, sizeof(outbuf), rc); @@ -873,8 +1055,24 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings) return rc; } +/* Transmit a PTP packet via the dedicated hardware timestamped queue. */ +static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) +{ + struct efx_ptp_data *ptp_data = efx->ptp_data; + struct efx_tx_queue *tx_queue; + u8 type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0; + + tx_queue = &ptp_data->channel->tx_queue[type]; + if (tx_queue && tx_queue->timestamping) { + efx_enqueue_skb(tx_queue, skb); + } else { + WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); + dev_kfree_skb_any(skb); + } +} + /* Transmit a PTP packet, via the MCDI interface, to the wire. */ -static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb) +static void efx_ptp_xmit_skb_mc(struct efx_nic *efx, struct sk_buff *skb) { struct efx_ptp_data *ptp_data = efx->ptp_data; struct skb_shared_hwtstamps timestamps; @@ -910,16 +1108,16 @@ static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb) timestamps.hwtstamp = ptp_data->nic_to_kernel_time( MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MAJOR), MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MINOR), - ptp_data->ts_corrections.tx); + ptp_data->ts_corrections.ptp_tx); skb_tstamp_tx(skb, ×tamps); rc = 0; fail: - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); - return rc; + return; } static void efx_ptp_drop_time_expired_events(struct efx_nic *efx) @@ -1189,7 +1387,7 @@ static void efx_ptp_worker(struct work_struct *work) efx_ptp_process_events(efx, &tempq); while ((skb = skb_dequeue(&ptp_data->txq))) - efx_ptp_xmit_skb(efx, skb); + ptp_data->xmit_skb(efx, skb); while ((skb = __skb_dequeue(&tempq))) efx_ptp_process_rx(efx, skb); @@ -1239,6 +1437,14 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) goto fail2; } + if (efx_ptp_use_mac_tx_timestamps(efx)) { + ptp->xmit_skb = efx_ptp_xmit_skb_queue; + /* Request sync events on this channel. */ + channel->sync_events_state = SYNC_EVENTS_QUIESCENT; + } else { + ptp->xmit_skb = efx_ptp_xmit_skb_mc; + } + INIT_WORK(&ptp->work, efx_ptp_worker); ptp->config.flags = 0; ptp->config.tx_type = HWTSTAMP_TX_OFF; @@ -1303,11 +1509,21 @@ fail1: static int efx_ptp_probe_channel(struct efx_channel *channel) { struct efx_nic *efx = channel->efx; + int rc; channel->irq_moderation_us = 0; channel->rx_queue.core_index = 0; - return efx_ptp_probe(efx, channel); + rc = efx_ptp_probe(efx, channel); + /* Failure to probe PTP is not fatal; this channel will just not be + * used for anything. + * In the case of EPERM, efx_ptp_probe will print its own message (in + * efx_ptp_get_attributes()), so we don't need to. + */ + if (rc && rc != -EPERM) + netif_warn(efx, drv, efx->net_dev, + "Failed to probe PTP, rc=%d\n", rc); + return 0; } void efx_ptp_remove(struct efx_nic *efx) @@ -1332,6 +1548,7 @@ void efx_ptp_remove(struct efx_nic *efx) efx_nic_free_buffer(efx, &efx->ptp_data->start); kfree(efx->ptp_data); + efx->ptp_data = NULL; } static void efx_ptp_remove_channel(struct efx_channel *channel) @@ -1548,6 +1765,17 @@ void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info) ts_info->so_timestamping |= (SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE); + /* Check licensed features. If we don't have the license for TX + * timestamps, the NIC will not support them. + */ + if (efx_ptp_use_mac_tx_timestamps(efx)) { + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + if (!(nic_data->licensed_features & + (1 << LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN))) + ts_info->so_timestamping &= + ~SOF_TIMESTAMPING_TX_HARDWARE; + } if (primary && primary->ptp_data && primary->ptp_data->phc_clock) ts_info->phc_index = ptp_clock_index(primary->ptp_data->phc_clock); @@ -1627,7 +1855,7 @@ static void ptp_event_rx(struct efx_nic *efx, struct efx_ptp_data *ptp) evt->hwtimestamp = efx->ptp_data->nic_to_kernel_time( EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA), EFX_QWORD_FIELD(ptp->evt_frags[1], MCDI_EVENT_DATA), - ptp->ts_corrections.rx); + ptp->ts_corrections.ptp_rx); evt->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); list_add_tail(&evt->link, &ptp->evt_list); @@ -1709,9 +1937,20 @@ void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev) { + struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp = efx->ptp_data; + + /* When extracting the sync timestamp minor value, we should discard + * the least significant two bits. These are not required in order + * to reconstruct full-range timestamps and they are optionally used + * to report status depending on the options supplied when subscribing + * for sync events. + */ channel->sync_timestamp_major = MCDI_EVENT_FIELD(*ev, PTP_TIME_MAJOR); channel->sync_timestamp_minor = - MCDI_EVENT_FIELD(*ev, PTP_TIME_MINOR_26_19) << 19; + (MCDI_EVENT_FIELD(*ev, PTP_TIME_MINOR_MS_8BITS) & 0xFC) + << ptp->nic_time.sync_event_minor_shift; + /* if sync events have been disabled then we want to silently ignore * this event, so throw away result. */ @@ -1719,15 +1958,6 @@ void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev) SYNC_EVENTS_VALID); } -/* make some assumptions about the time representation rather than abstract it, - * since we currently only support one type of inline timestamping and only on - * EF10. - */ -#define MINOR_TICKS_PER_SECOND 0x8000000 -/* Fuzz factor for sync events to be out of order with RX events */ -#define FUZZ (MINOR_TICKS_PER_SECOND / 10) -#define EXPECTED_SYNC_EVENTS_PER_SECOND 4 - static inline u32 efx_rx_buf_timestamp_minor(struct efx_nic *efx, const u8 *eh) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) @@ -1745,31 +1975,33 @@ void __efx_rx_skb_attach_timestamp(struct efx_channel *channel, struct sk_buff *skb) { struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp = efx->ptp_data; u32 pkt_timestamp_major, pkt_timestamp_minor; u32 diff, carry; struct skb_shared_hwtstamps *timestamps; - pkt_timestamp_minor = (efx_rx_buf_timestamp_minor(efx, - skb_mac_header(skb)) + - (u32) efx->ptp_data->ts_corrections.rx) & - (MINOR_TICKS_PER_SECOND - 1); + if (channel->sync_events_state != SYNC_EVENTS_VALID) + return; + + pkt_timestamp_minor = efx_rx_buf_timestamp_minor(efx, skb_mac_header(skb)); /* get the difference between the packet and sync timestamps, * modulo one second */ - diff = (pkt_timestamp_minor - channel->sync_timestamp_minor) & - (MINOR_TICKS_PER_SECOND - 1); + diff = pkt_timestamp_minor - channel->sync_timestamp_minor; + if (pkt_timestamp_minor < channel->sync_timestamp_minor) + diff += ptp->nic_time.minor_max; + /* do we roll over a second boundary and need to carry the one? */ - carry = channel->sync_timestamp_minor + diff > MINOR_TICKS_PER_SECOND ? + carry = (channel->sync_timestamp_minor >= ptp->nic_time.minor_max - diff) ? 1 : 0; - if (diff <= MINOR_TICKS_PER_SECOND / EXPECTED_SYNC_EVENTS_PER_SECOND + - FUZZ) { + if (diff <= ptp->nic_time.sync_event_diff_max) { /* packet is ahead of the sync event by a quarter of a second or * less (allowing for fuzz) */ pkt_timestamp_major = channel->sync_timestamp_major + carry; - } else if (diff >= MINOR_TICKS_PER_SECOND - FUZZ) { + } else if (diff >= ptp->nic_time.sync_event_diff_min) { /* packet is behind the sync event but within the fuzz factor. * This means the RX packet and sync event crossed as they were * placed on the event queue, which can sometimes happen. @@ -1791,7 +2023,9 @@ void __efx_rx_skb_attach_timestamp(struct efx_channel *channel, /* attach the timestamps to the skb */ timestamps = skb_hwtstamps(skb); timestamps->hwtstamp = - efx_ptp_s27_to_ktime(pkt_timestamp_major, pkt_timestamp_minor); + ptp->nic_to_kernel_time(pkt_timestamp_major, + pkt_timestamp_minor, + ptp->ts_corrections.general_rx); } static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) @@ -1809,9 +2043,10 @@ static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) else if (delta < -MAX_PPB) delta = -MAX_PPB; - /* Convert ppb to fixed point ns. */ - adjustment_ns = (((s64)delta * PPB_SCALE_WORD) >> - (PPB_EXTRA_BITS + MAX_PPB_BITS)); + /* Convert ppb to fixed point ns taking care to round correctly. */ + adjustment_ns = ((s64)delta * PPB_SCALE_WORD + + (1 << (ptp_data->adjfreq_ppb_shift - 1))) >> + ptp_data->adjfreq_ppb_shift; MCDI_SET_DWORD(inadj, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST); MCDI_SET_DWORD(inadj, PTP_IN_PERIPH_ID, 0); @@ -1911,13 +2146,14 @@ static int efx_phc_enable(struct ptp_clock_info *ptp, return 0; } -static const struct efx_channel_type efx_ptp_channel_type = { +const struct efx_channel_type efx_ptp_channel_type = { .handle_no_channel = efx_ptp_handle_no_channel, .pre_probe = efx_ptp_probe_channel, .post_remove = efx_ptp_remove_channel, .get_name = efx_ptp_get_channel_name, /* no copy operation; there is no need to reallocate this channel */ .receive_skb = efx_ptp_rx, + .want_txqs = efx_ptp_want_txqs, .keep_eventq = false, }; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 9937a2450e57..cece961f2e82 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -77,9 +77,23 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, } if (buffer->flags & EFX_TX_BUF_SKB) { + struct sk_buff *skb = (struct sk_buff *)buffer->skb; + EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); (*pkts_compl)++; - (*bytes_compl) += buffer->skb->len; + (*bytes_compl) += skb->len; + if (tx_queue->timestamping && + (tx_queue->completed_timestamp_major || + tx_queue->completed_timestamp_minor)) { + struct skb_shared_hwtstamps hwtstamp; + + hwtstamp.hwtstamp = + efx_ptp_nic_to_kernel_time(tx_queue); + skb_tstamp_tx(skb, &hwtstamp); + + tx_queue->completed_timestamp_major = 0; + tx_queue->completed_timestamp_minor = 0; + } dev_consume_skb_any((struct sk_buff *)buffer->skb); netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, "TX queue %d transmission id %x complete\n", @@ -828,6 +842,11 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->old_read_count = 0; tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; tx_queue->xmit_more_available = false; + tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) && + tx_queue->channel == efx_ptp_channel(efx)); + tx_queue->completed_desc_ptr = tx_queue->ptr_mask; + tx_queue->completed_timestamp_major = 0; + tx_queue->completed_timestamp_minor = 0; /* Set up default function pointers. These may get replaced by * efx_nic_init_tx() based off NIC/queue capabilities. diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index ce2ea2d491ac..2ffe76c0ff74 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -474,7 +474,7 @@ struct mac_device_info; /* Helpers to program the MAC core */ struct stmmac_ops { /* MAC core initialization */ - void (*core_init)(struct mac_device_info *hw, int mtu); + void (*core_init)(struct mac_device_info *hw, struct net_device *dev); /* Enable the MAC RX/TX */ void (*set_mac)(void __iomem *ioaddr, bool enable); /* Enable and verify that the IPC module is supported */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 9eb7f65d8000..a3fa65b1ca8e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -483,7 +483,8 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv) return 0; } -static void sun8i_dwmac_core_init(struct mac_device_info *hw, int mtu) +static void sun8i_dwmac_core_init(struct mac_device_info *hw, + struct net_device *dev) { void __iomem *ioaddr = hw->pcsr; u32 v; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 8a86340ff2d3..540d21786a43 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -25,18 +25,28 @@ #include <linux/crc32.h> #include <linux/slab.h> #include <linux/ethtool.h> +#include <net/dsa.h> #include <asm/io.h> #include "stmmac_pcs.h" #include "dwmac1000.h" -static void dwmac1000_core_init(struct mac_device_info *hw, int mtu) +static void dwmac1000_core_init(struct mac_device_info *hw, + struct net_device *dev) { void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + GMAC_CONTROL); + int mtu = dev->mtu; /* Configure GMAC core */ value |= GMAC_CORE_INIT; + /* Clear ACS bit because Ethernet switch tagging formats such as + * Broadcom tags can look like invalid LLC/SNAP packets and cause the + * hardware to truncate packets on reception. + */ + if (netdev_uses_dsa(dev)) + value &= ~GMAC_CONTROL_ACS; + if (mtu > 1500) value |= GMAC_CONTROL_2K; if (mtu > 2000) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index 8ef517356313..91b23f9db31a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -25,15 +25,26 @@ *******************************************************************************/ #include <linux/crc32.h> +#include <net/dsa.h> #include <asm/io.h> #include "dwmac100.h" -static void dwmac100_core_init(struct mac_device_info *hw, int mtu) +static void dwmac100_core_init(struct mac_device_info *hw, + struct net_device *dev) { void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + MAC_CONTROL); - writel((value | MAC_CORE_INIT), ioaddr + MAC_CONTROL); + value |= MAC_CORE_INIT; + + /* Clear ASTP bit because Ethernet switch tagging formats such as + * Broadcom tags can look like invalid LLC/SNAP packets and cause the + * hardware to truncate packets on reception. + */ + if (netdev_uses_dsa(dev)) + value &= ~MAC_CONTROL_ASTP; + + writel(value, ioaddr + MAC_CONTROL); #ifdef STMMAC_VLAN_TAG_USED writel(ETH_P_8021Q, ioaddr + MAC_VLAN1); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index f3ed8f7853eb..ed222b20fcf1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -17,16 +17,26 @@ #include <linux/slab.h> #include <linux/ethtool.h> #include <linux/io.h> +#include <net/dsa.h> #include "stmmac_pcs.h" #include "dwmac4.h" -static void dwmac4_core_init(struct mac_device_info *hw, int mtu) +static void dwmac4_core_init(struct mac_device_info *hw, + struct net_device *dev) { void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + GMAC_CONFIG); + int mtu = dev->mtu; value |= GMAC_CORE_INIT; + /* Clear ACS bit because Ethernet switch tagging formats such as + * Broadcom tags can look like invalid LLC/SNAP packets and cause the + * hardware to truncate packets on reception. + */ + if (netdev_uses_dsa(dev)) + value &= ~GMAC_CONFIG_ACS; + if (mtu > 1500) value |= GMAC_CONFIG_2K; if (mtu > 2000) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 2fd8456999f6..c728ffa095de 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -334,7 +334,7 @@ static void dwmac4_rd_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, if (tx_own) tdes3 |= TDES3_OWN; - if (is_fs & tx_own) + if (is_fs && tx_own) /* When the own bit, for the first frame, has to be set, all * descriptors for the same frame has to be set before, to * avoid race condition. @@ -377,7 +377,7 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs, if (tx_own) tdes3 |= TDES3_OWN; - if (is_fs & tx_own) + if (is_fs && tx_own) /* When the own bit, for the first frame, has to be set, all * descriptors for the same frame has to be set before, to * avoid race condition. diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index b47cb5c4da51..6768a25b6aa0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -341,7 +341,7 @@ static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, if (tx_own) tdes0 |= ETDES0_OWN; - if (is_fs & tx_own) + if (is_fs && tx_own) /* When the own bit, for the first frame, has to be set, all * descriptors for the same frame has to be set before, to * avoid race condition. diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f99f14c35063..7ad841434ec8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2527,7 +2527,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) } /* Initialize the MAC Core */ - priv->hw->mac->core_init(priv->hw, dev->mtu); + priv->hw->mac->core_init(priv->hw, dev); /* Initialize MTL*/ if (priv->synopsys_id >= DWMAC_CORE_4_00) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index ed58c746e4af..f5a7eb22d0f5 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -715,7 +715,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) /* warning!!!! We are retrieving the virtual ptr in the sw_data * field as a 32bit value. Will not work on 64bit machines */ - page = (struct page *)GET_SW_DATA0(desc); + page = (struct page *)GET_SW_DATA0(ndesc); if (likely(dma_buff && buf_len && page)) { dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE, diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 91a67c5297f7..c3ca191fea7f 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1221,7 +1221,6 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, struct ndis_recv_scale_cap rsscap; u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); u32 mtu, size; - const struct cpumask *node_cpu_mask; u32 num_possible_rss_qs; int i, ret; @@ -1290,14 +1289,8 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, if (ret || rsscap.num_recv_que < 2) goto out; - /* - * We will limit the VRSS channels to the number CPUs in the NUMA node - * the primary channel is currently bound to. - * - * This also guarantees that num_possible_rss_qs <= num_online_cpus - */ - node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu)); - num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask), + /* This guarantees that num_possible_rss_qs <= num_online_cpus */ + num_possible_rss_qs = min_t(u32, num_online_cpus(), rsscap.num_recv_que); net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index f522715c6595..7de88b33d5b9 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -396,8 +396,6 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) #define MACSEC_GCM_AES_128_SAK_LEN 16 #define MACSEC_GCM_AES_256_SAK_LEN 32 -#define MAX_SAK_LEN MACSEC_GCM_AES_256_SAK_LEN - #define DEFAULT_SAK_LEN MACSEC_GCM_AES_128_SAK_LEN #define DEFAULT_SEND_SCI true #define DEFAULT_ENCRYPT false @@ -1605,7 +1603,7 @@ static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = { [MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY, .len = MACSEC_KEYID_LEN, }, [MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY, - .len = MAX_SAK_LEN, }, + .len = MACSEC_MAX_KEY_LEN, }, }; static int parse_sa_config(struct nlattr **attrs, struct nlattr **tb_sa) @@ -2374,7 +2372,7 @@ static int nla_put_secy(struct macsec_secy *secy, struct sk_buff *skb) switch (secy->key_len) { case MACSEC_GCM_AES_128_SAK_LEN: - csid = MACSEC_CIPHER_ID_GCM_AES_128; + csid = MACSEC_DEFAULT_CIPHER_ID; break; case MACSEC_GCM_AES_256_SAK_LEN: csid = MACSEC_CIPHER_ID_GCM_AES_256; @@ -3076,7 +3074,7 @@ static int macsec_changelink_common(struct net_device *dev, if (data[IFLA_MACSEC_CIPHER_SUITE]) { switch (nla_get_u64(data[IFLA_MACSEC_CIPHER_SUITE])) { case MACSEC_CIPHER_ID_GCM_AES_128: - case MACSEC_DEFAULT_CIPHER_ALT: + case MACSEC_DEFAULT_CIPHER_ID: secy->key_len = MACSEC_GCM_AES_128_SAK_LEN; break; case MACSEC_CIPHER_ID_GCM_AES_256: @@ -3355,7 +3353,7 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[], switch (csid) { case MACSEC_CIPHER_ID_GCM_AES_128: case MACSEC_CIPHER_ID_GCM_AES_256: - case MACSEC_DEFAULT_CIPHER_ALT: + case MACSEC_DEFAULT_CIPHER_ID: if (icv_len < MACSEC_MIN_ICV_LEN || icv_len > MACSEC_STD_ICV_LEN) return -EINVAL; @@ -3428,7 +3426,7 @@ static int macsec_fill_info(struct sk_buff *skb, switch (secy->key_len) { case MACSEC_GCM_AES_128_SAK_LEN: - csid = MACSEC_CIPHER_ID_GCM_AES_128; + csid = MACSEC_DEFAULT_CIPHER_ID; break; case MACSEC_GCM_AES_256_SAK_LEN: csid = MACSEC_CIPHER_ID_GCM_AES_256; diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index 074ddebbc41d..09388c06171d 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -4,4 +4,8 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o netdevsim-objs := \ netdev.o \ - bpf.o \ + +ifeq ($(CONFIG_BPF_SYSCALL),y) +netdevsim-objs += \ + bpf.o +endif diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 5134d5c1306c..8166f121bbcc 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -17,11 +17,15 @@ #include <linux/bpf_verifier.h> #include <linux/debugfs.h> #include <linux/kernel.h> +#include <linux/mutex.h> #include <linux/rtnetlink.h> #include <net/pkt_cls.h> #include "netdevsim.h" +#define pr_vlog(env, fmt, ...) \ + bpf_verifier_log_write(env, "[netdevsim] " fmt, ##__VA_ARGS__) + struct nsim_bpf_bound_prog { struct netdevsim *ns; struct bpf_prog *prog; @@ -31,6 +35,19 @@ struct nsim_bpf_bound_prog { struct list_head l; }; +#define NSIM_BPF_MAX_KEYS 2 + +struct nsim_bpf_bound_map { + struct netdevsim *ns; + struct bpf_offloaded_map *map; + struct mutex mutex; + struct nsim_map_entry { + void *key; + void *value; + } entry[NSIM_BPF_MAX_KEYS]; + struct list_head l; +}; + static int nsim_debugfs_bpf_string_read(struct seq_file *file, void *data) { const char **str = file->private; @@ -63,6 +80,9 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn) if (state->ns->bpf_bind_verifier_delay && !insn_idx) msleep(state->ns->bpf_bind_verifier_delay); + if (insn_idx == env->prog->len - 1) + pr_vlog(env, "Hello from netdevsim!\n"); + return 0; } @@ -109,17 +129,35 @@ int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, struct netdevsim *ns = cb_priv; struct bpf_prog *oldprog; - if (type != TC_SETUP_CLSBPF || - !tc_can_offload(ns->netdev) || - cls_bpf->common.protocol != htons(ETH_P_ALL) || - cls_bpf->common.chain_index) + if (type != TC_SETUP_CLSBPF) { + NSIM_EA(cls_bpf->common.extack, + "only offload of BPF classifiers supported"); + return -EOPNOTSUPP; + } + + if (!tc_can_offload_extack(ns->netdev, cls_bpf->common.extack)) return -EOPNOTSUPP; - if (!ns->bpf_tc_accept) + if (cls_bpf->common.protocol != htons(ETH_P_ALL)) { + NSIM_EA(cls_bpf->common.extack, + "only ETH_P_ALL supported as filter protocol"); return -EOPNOTSUPP; + } + + if (cls_bpf->common.chain_index) + return -EOPNOTSUPP; + + if (!ns->bpf_tc_accept) { + NSIM_EA(cls_bpf->common.extack, + "netdevsim configured to reject BPF TC offload"); + return -EOPNOTSUPP; + } /* Note: progs without skip_sw will probably not be dev bound */ - if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept) + if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept) { + NSIM_EA(cls_bpf->common.extack, + "netdevsim configured to reject unbound programs"); return -EOPNOTSUPP; + } if (cls_bpf->command != TC_CLSBPF_OFFLOAD) return -EOPNOTSUPP; @@ -131,8 +169,11 @@ int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, oldprog = NULL; if (!cls_bpf->prog) return 0; - if (ns->bpf_offloaded) + if (ns->bpf_offloaded) { + NSIM_EA(cls_bpf->common.extack, + "driver and netdev offload states mismatch"); return -EBUSY; + } } return nsim_bpf_offload(ns, cls_bpf->prog, oldprog); @@ -284,6 +325,224 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf) return 0; } +static bool +nsim_map_key_match(struct bpf_map *map, struct nsim_map_entry *e, void *key) +{ + return e->key && !memcmp(key, e->key, map->key_size); +} + +static int nsim_map_key_find(struct bpf_offloaded_map *offmap, void *key) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) + if (nsim_map_key_match(&offmap->map, &nmap->entry[i], key)) + return i; + + return -ENOENT; +} + +static int +nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + + nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER); + if (!nmap->entry[idx].key) + return -ENOMEM; + nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER); + if (!nmap->entry[idx].value) { + kfree(nmap->entry[idx].key); + nmap->entry[idx].key = NULL; + return -ENOMEM; + } + + return 0; +} + +static int +nsim_map_get_next_key(struct bpf_offloaded_map *offmap, + void *key, void *next_key) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + int idx = -ENOENT; + + mutex_lock(&nmap->mutex); + + if (key) + idx = nsim_map_key_find(offmap, key); + if (idx == -ENOENT) + idx = 0; + else + idx++; + + for (; idx < ARRAY_SIZE(nmap->entry); idx++) { + if (nmap->entry[idx].key) { + memcpy(next_key, nmap->entry[idx].key, + offmap->map.key_size); + break; + } + } + + mutex_unlock(&nmap->mutex); + + if (idx == ARRAY_SIZE(nmap->entry)) + return -ENOENT; + return 0; +} + +static int +nsim_map_lookup_elem(struct bpf_offloaded_map *offmap, void *key, void *value) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + int idx; + + mutex_lock(&nmap->mutex); + + idx = nsim_map_key_find(offmap, key); + if (idx >= 0) + memcpy(value, nmap->entry[idx].value, offmap->map.value_size); + + mutex_unlock(&nmap->mutex); + + return idx < 0 ? idx : 0; +} + +static int +nsim_map_update_elem(struct bpf_offloaded_map *offmap, + void *key, void *value, u64 flags) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + int idx, err = 0; + + mutex_lock(&nmap->mutex); + + idx = nsim_map_key_find(offmap, key); + if (idx < 0 && flags == BPF_EXIST) { + err = idx; + goto exit_unlock; + } + if (idx >= 0 && flags == BPF_NOEXIST) { + err = -EEXIST; + goto exit_unlock; + } + + if (idx < 0) { + for (idx = 0; idx < ARRAY_SIZE(nmap->entry); idx++) + if (!nmap->entry[idx].key) + break; + if (idx == ARRAY_SIZE(nmap->entry)) { + err = -E2BIG; + goto exit_unlock; + } + + err = nsim_map_alloc_elem(offmap, idx); + if (err) + goto exit_unlock; + } + + memcpy(nmap->entry[idx].key, key, offmap->map.key_size); + memcpy(nmap->entry[idx].value, value, offmap->map.value_size); +exit_unlock: + mutex_unlock(&nmap->mutex); + + return err; +} + +static int nsim_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + int idx; + + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) + return -EINVAL; + + mutex_lock(&nmap->mutex); + + idx = nsim_map_key_find(offmap, key); + if (idx >= 0) { + kfree(nmap->entry[idx].key); + kfree(nmap->entry[idx].value); + memset(&nmap->entry[idx], 0, sizeof(nmap->entry[idx])); + } + + mutex_unlock(&nmap->mutex); + + return idx < 0 ? idx : 0; +} + +static const struct bpf_map_dev_ops nsim_bpf_map_ops = { + .map_get_next_key = nsim_map_get_next_key, + .map_lookup_elem = nsim_map_lookup_elem, + .map_update_elem = nsim_map_update_elem, + .map_delete_elem = nsim_map_delete_elem, +}; + +static int +nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) +{ + struct nsim_bpf_bound_map *nmap; + unsigned int i; + int err; + + if (WARN_ON(offmap->map.map_type != BPF_MAP_TYPE_ARRAY && + offmap->map.map_type != BPF_MAP_TYPE_HASH)) + return -EINVAL; + if (offmap->map.max_entries > NSIM_BPF_MAX_KEYS) + return -ENOMEM; + if (offmap->map.map_flags) + return -EINVAL; + + nmap = kzalloc(sizeof(*nmap), GFP_USER); + if (!nmap) + return -ENOMEM; + + offmap->dev_priv = nmap; + nmap->ns = ns; + nmap->map = offmap; + mutex_init(&nmap->mutex); + + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) { + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { + u32 *key; + + err = nsim_map_alloc_elem(offmap, i); + if (err) + goto err_free; + key = nmap->entry[i].key; + *key = i; + } + } + + offmap->dev_ops = &nsim_bpf_map_ops; + list_add_tail(&nmap->l, &ns->bpf_bound_maps); + + return 0; + +err_free: + while (--i) { + kfree(nmap->entry[i].key); + kfree(nmap->entry[i].value); + } + kfree(nmap); + return err; +} + +static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { + kfree(nmap->entry[i].key); + kfree(nmap->entry[i].value); + } + list_del_init(&nmap->l); + mutex_destroy(&nmap->mutex); + kfree(nmap); +} + int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) { struct netdevsim *ns = netdev_priv(dev); @@ -328,6 +587,14 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) return err; return nsim_xdp_set_prog(ns, bpf); + case BPF_OFFLOAD_MAP_ALLOC: + if (!ns->bpf_map_accept) + return -EOPNOTSUPP; + + return nsim_bpf_map_alloc(ns, bpf->offmap); + case BPF_OFFLOAD_MAP_FREE: + nsim_bpf_map_free(bpf->offmap); + return 0; default: return -EINVAL; } @@ -336,6 +603,7 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) int nsim_bpf_init(struct netdevsim *ns) { INIT_LIST_HEAD(&ns->bpf_bound_progs); + INIT_LIST_HEAD(&ns->bpf_bound_maps); debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir, &ns->bpf_offloaded_id); @@ -362,12 +630,17 @@ int nsim_bpf_init(struct netdevsim *ns) debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir, &ns->bpf_xdpoffload_accept); + ns->bpf_map_accept = true; + debugfs_create_bool("bpf_map_accept", 0600, ns->ddir, + &ns->bpf_map_accept); + return 0; } void nsim_bpf_uninit(struct netdevsim *ns) { WARN_ON(!list_empty(&ns->bpf_bound_progs)); + WARN_ON(!list_empty(&ns->bpf_bound_maps)); WARN_ON(ns->xdp_prog); WARN_ON(ns->bpf_offloaded); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 32270de9395a..ea081c10efb8 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -61,16 +61,47 @@ struct netdevsim { bool bpf_tc_non_bound_accept; bool bpf_xdpdrv_accept; bool bpf_xdpoffload_accept; + + bool bpf_map_accept; + struct list_head bpf_bound_maps; }; extern struct dentry *nsim_ddir; +#ifdef CONFIG_BPF_SYSCALL int nsim_bpf_init(struct netdevsim *ns); void nsim_bpf_uninit(struct netdevsim *ns); int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf); int nsim_bpf_disable_tc(struct netdevsim *ns); int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv); +#else +static inline int nsim_bpf_init(struct netdevsim *ns) +{ + return 0; +} + +static inline void nsim_bpf_uninit(struct netdevsim *ns) +{ +} + +static inline int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) +{ + return bpf->command == XDP_QUERY_PROG ? 0 : -EOPNOTSUPP; +} + +static inline int nsim_bpf_disable_tc(struct netdevsim *ns) +{ + return 0; +} + +static inline int +nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + return -EOPNOTSUPP; +} +#endif static inline struct netdevsim *to_nsim(struct device *ptr) { diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index bdc4bb3c8288..8961209ee949 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(sfp_upstream_stop); /** * sfp_register_upstream() - Register the neighbouring device - * @np: device node for the SFP bus + * @fwnode: firmware node for the SFP bus * @ndev: network device associated with the interface * @upstream: the upstream private data * @ops: the upstream's &struct sfp_upstream_ops diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 4e1da1645b15..5aa59f41bf8c 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -842,6 +842,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, struct pppoe_hdr *ph; struct net_device *dev; char *start; + int hlen; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { @@ -860,16 +861,16 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, if (total_len > (dev->mtu + dev->hard_header_len)) goto end; - - skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32, - 0, GFP_KERNEL); + hlen = LL_RESERVED_SPACE(dev); + skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + + dev->needed_tailroom, 0, GFP_KERNEL); if (!skb) { error = -ENOMEM; goto end; } /* Reserve space for headers. */ - skb_reserve(skb, dev->hard_header_len); + skb_reserve(skb, hlen); skb_reset_network_header(skb); skb->dev = dev; @@ -930,7 +931,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) /* Copy the data if there is no space for the header or if it's * read-only. */ - if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len)) + if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) goto abort; __skb_push(skb, sizeof(*ph)); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 170a3e89b5af..a0c5cb1a1617 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -679,6 +679,15 @@ static void tun_queue_purge(struct tun_file *tfile) skb_queue_purge(&tfile->sk.sk_error_queue); } +static void tun_cleanup_tx_ring(struct tun_file *tfile) +{ + if (tfile->tx_ring.queue) { + ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); + xdp_rxq_info_unreg(&tfile->xdp_rxq); + memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring)); + } +} + static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -725,10 +734,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - if (tun) { - ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); - xdp_rxq_info_unreg(&tfile->xdp_rxq); - } + tun_cleanup_tx_ring(tfile); sock_put(&tfile->sk); } } @@ -768,14 +774,14 @@ static void tun_detach_all(struct net_device *dev) tun_napi_del(tun, tfile); /* Drop read queue */ tun_queue_purge(tfile); - xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); + tun_cleanup_tx_ring(tfile); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); - xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); + tun_cleanup_tx_ring(tfile); } BUG_ON(tun->numdisabled != 0); @@ -2217,7 +2223,8 @@ static void tun_prog_free(struct rcu_head *rcu) kfree(prog); } -static int __tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p, +static int __tun_set_ebpf(struct tun_struct *tun, + struct tun_prog __rcu **prog_p, struct bpf_prog *prog) { struct tun_prog *old, *new = NULL; @@ -3145,6 +3152,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); + memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring)); + return 0; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d51d9abf7986..0657203ffb91 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -606,6 +606,7 @@ enum rtl8152_flags { PHY_RESET, SCHEDULE_NAPI, GREEN_ETHERNET, + DELL_TB_RX_AGG_BUG, }; /* Define these values to match your device */ @@ -1798,6 +1799,9 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) dev_kfree_skb_any(skb); remain = agg_buf_sz - (int)(tx_agg_align(tx_data) - agg->head); + + if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags)) + break; } if (!skb_queue_empty(&skb_head)) { @@ -4133,6 +4137,9 @@ static void r8153_init(struct r8152 *tp) /* rx aggregation */ ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); + if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags)) + ocp_data |= RX_AGG_DISABLE; + ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); rtl_tally_reset(tp); @@ -5207,6 +5214,12 @@ static int rtl8152_probe(struct usb_interface *intf, netdev->hw_features &= ~NETIF_F_RXCSUM; } + if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && + udev->serial && !strcmp(udev->serial, "000001000000")) { + dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation"); + set_bit(DELL_TB_RX_AGG_BUG, &tp->flags); + } + netdev->ethtool_ops = &ops; netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index d56fe32bf48d..8a22ff67b026 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -457,12 +457,10 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb, void usbnet_defer_kevent (struct usbnet *dev, int work) { set_bit (work, &dev->flags); - if (!schedule_work (&dev->kevent)) { - if (net_ratelimit()) - netdev_err(dev->net, "kevent %d may have been dropped\n", work); - } else { + if (!schedule_work (&dev->kevent)) + netdev_dbg(dev->net, "kevent %d may have been dropped\n", work); + else netdev_dbg(dev->net, "kevent %d scheduled\n", work); - } } EXPORT_SYMBOL_GPL(usbnet_defer_kevent); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 12dfc5fee58e..626c27352ae2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -66,16 +66,39 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_UFO }; -struct virtnet_stats { - struct u64_stats_sync tx_syncp; - struct u64_stats_sync rx_syncp; - u64 tx_bytes; - u64 tx_packets; - - u64 rx_bytes; - u64 rx_packets; +struct virtnet_stat_desc { + char desc[ETH_GSTRING_LEN]; + size_t offset; }; +struct virtnet_sq_stats { + struct u64_stats_sync syncp; + u64 packets; + u64 bytes; +}; + +struct virtnet_rq_stats { + struct u64_stats_sync syncp; + u64 packets; + u64 bytes; +}; + +#define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) +#define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) + +static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { + { "packets", VIRTNET_SQ_STAT(packets) }, + { "bytes", VIRTNET_SQ_STAT(bytes) }, +}; + +static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { + { "packets", VIRTNET_RQ_STAT(packets) }, + { "bytes", VIRTNET_RQ_STAT(bytes) }, +}; + +#define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) +#define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc) + /* Internal representation of a send virtqueue */ struct send_queue { /* Virtqueue associated with this send _queue */ @@ -87,6 +110,8 @@ struct send_queue { /* Name of the send queue: output.$index */ char name[40]; + struct virtnet_sq_stats stats; + struct napi_struct napi; }; @@ -99,6 +124,8 @@ struct receive_queue { struct bpf_prog __rcu *xdp_prog; + struct virtnet_rq_stats stats; + /* Chain pages by the private ptr. */ struct page *pages; @@ -152,9 +179,6 @@ struct virtnet_info { /* Packet virtio header size */ u8 hdr_len; - /* Active statistics */ - struct virtnet_stats __percpu *stats; - /* Work struct for refilling if we run low on memory. */ struct delayed_work refill; @@ -1127,7 +1151,6 @@ static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) struct virtnet_info *vi = rq->vq->vdev->priv; unsigned int len, received = 0, bytes = 0; void *buf; - struct virtnet_stats *stats = this_cpu_ptr(vi->stats); if (!vi->big_packets || vi->mergeable_rx_bufs) { void *ctx; @@ -1150,10 +1173,10 @@ static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) schedule_delayed_work(&vi->refill, 0); } - u64_stats_update_begin(&stats->rx_syncp); - stats->rx_bytes += bytes; - stats->rx_packets += received; - u64_stats_update_end(&stats->rx_syncp); + u64_stats_update_begin(&rq->stats.syncp); + rq->stats.bytes += bytes; + rq->stats.packets += received; + u64_stats_update_end(&rq->stats.syncp); return received; } @@ -1162,8 +1185,6 @@ static void free_old_xmit_skbs(struct send_queue *sq) { struct sk_buff *skb; unsigned int len; - struct virtnet_info *vi = sq->vq->vdev->priv; - struct virtnet_stats *stats = this_cpu_ptr(vi->stats); unsigned int packets = 0; unsigned int bytes = 0; @@ -1182,10 +1203,10 @@ static void free_old_xmit_skbs(struct send_queue *sq) if (!packets) return; - u64_stats_update_begin(&stats->tx_syncp); - stats->tx_bytes += bytes; - stats->tx_packets += packets; - u64_stats_update_end(&stats->tx_syncp); + u64_stats_update_begin(&sq->stats.syncp); + sq->stats.bytes += bytes; + sq->stats.packets += packets; + u64_stats_update_end(&sq->stats.syncp); } static void virtnet_poll_cleantx(struct receive_queue *rq) @@ -1474,24 +1495,25 @@ static void virtnet_stats(struct net_device *dev, struct rtnl_link_stats64 *tot) { struct virtnet_info *vi = netdev_priv(dev); - int cpu; unsigned int start; + int i; - for_each_possible_cpu(cpu) { - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu); + for (i = 0; i < vi->max_queue_pairs; i++) { u64 tpackets, tbytes, rpackets, rbytes; + struct receive_queue *rq = &vi->rq[i]; + struct send_queue *sq = &vi->sq[i]; do { - start = u64_stats_fetch_begin_irq(&stats->tx_syncp); - tpackets = stats->tx_packets; - tbytes = stats->tx_bytes; - } while (u64_stats_fetch_retry_irq(&stats->tx_syncp, start)); + start = u64_stats_fetch_begin_irq(&sq->stats.syncp); + tpackets = sq->stats.packets; + tbytes = sq->stats.bytes; + } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); do { - start = u64_stats_fetch_begin_irq(&stats->rx_syncp); - rpackets = stats->rx_packets; - rbytes = stats->rx_bytes; - } while (u64_stats_fetch_retry_irq(&stats->rx_syncp, start)); + start = u64_stats_fetch_begin_irq(&rq->stats.syncp); + rpackets = rq->stats.packets; + rbytes = rq->stats.bytes; + } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start)); tot->rx_packets += rpackets; tot->tx_packets += tpackets; @@ -1829,6 +1851,83 @@ static int virtnet_set_channels(struct net_device *dev, return err; } +static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct virtnet_info *vi = netdev_priv(dev); + char *p = (char *)data; + unsigned int i, j; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < vi->curr_queue_pairs; i++) { + for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { + snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s", + i, virtnet_rq_stats_desc[j].desc); + p += ETH_GSTRING_LEN; + } + } + + for (i = 0; i < vi->curr_queue_pairs; i++) { + for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { + snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s", + i, virtnet_sq_stats_desc[j].desc); + p += ETH_GSTRING_LEN; + } + } + break; + } +} + +static int virtnet_get_sset_count(struct net_device *dev, int sset) +{ + struct virtnet_info *vi = netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN + + VIRTNET_SQ_STATS_LEN); + default: + return -EOPNOTSUPP; + } +} + +static void virtnet_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct virtnet_info *vi = netdev_priv(dev); + unsigned int idx = 0, start, i, j; + const u8 *stats_base; + size_t offset; + + for (i = 0; i < vi->curr_queue_pairs; i++) { + struct receive_queue *rq = &vi->rq[i]; + + stats_base = (u8 *)&rq->stats; + do { + start = u64_stats_fetch_begin_irq(&rq->stats.syncp); + for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { + offset = virtnet_rq_stats_desc[j].offset; + data[idx + j] = *(u64 *)(stats_base + offset); + } + } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start)); + idx += VIRTNET_RQ_STATS_LEN; + } + + for (i = 0; i < vi->curr_queue_pairs; i++) { + struct send_queue *sq = &vi->sq[i]; + + stats_base = (u8 *)&sq->stats; + do { + start = u64_stats_fetch_begin_irq(&sq->stats.syncp); + for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { + offset = virtnet_sq_stats_desc[j].offset; + data[idx + j] = *(u64 *)(stats_base + offset); + } + } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); + idx += VIRTNET_SQ_STATS_LEN; + } +} + static void virtnet_get_channels(struct net_device *dev, struct ethtool_channels *channels) { @@ -1928,6 +2027,9 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .get_drvinfo = virtnet_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = virtnet_get_ringparam, + .get_strings = virtnet_get_strings, + .get_sset_count = virtnet_get_sset_count, + .get_ethtool_stats = virtnet_get_ethtool_stats, .set_channels = virtnet_set_channels, .get_channels = virtnet_get_channels, .get_ts_info = ethtool_op_get_ts_info, @@ -2420,6 +2522,9 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); + + u64_stats_init(&vi->rq[i].stats.syncp); + u64_stats_init(&vi->sq[i].stats.syncp); } return 0; @@ -2544,7 +2649,7 @@ static int virtnet_validate(struct virtio_device *vdev) static int virtnet_probe(struct virtio_device *vdev) { - int i, err; + int i, err = -ENOMEM; struct net_device *dev; struct virtnet_info *vi; u16 max_queue_pairs; @@ -2621,17 +2726,6 @@ static int virtnet_probe(struct virtio_device *vdev) vi->dev = dev; vi->vdev = vdev; vdev->priv = vi; - vi->stats = alloc_percpu(struct virtnet_stats); - err = -ENOMEM; - if (vi->stats == NULL) - goto free; - - for_each_possible_cpu(i) { - struct virtnet_stats *virtnet_stats; - virtnet_stats = per_cpu_ptr(vi->stats, i); - u64_stats_init(&virtnet_stats->tx_syncp); - u64_stats_init(&virtnet_stats->rx_syncp); - } INIT_WORK(&vi->config_work, virtnet_config_changed_work); @@ -2668,7 +2762,7 @@ static int virtnet_probe(struct virtio_device *vdev) */ dev_err(&vdev->dev, "device MTU appears to have changed " "it is now %d < %d", mtu, dev->min_mtu); - goto free_stats; + goto free; } dev->mtu = mtu; @@ -2692,7 +2786,7 @@ static int virtnet_probe(struct virtio_device *vdev) /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ err = init_vqs(vi); if (err) - goto free_stats; + goto free; #ifdef CONFIG_SYSFS if (vi->mergeable_rx_bufs) @@ -2747,8 +2841,6 @@ free_vqs: cancel_delayed_work_sync(&vi->refill); free_receive_page_frags(vi); virtnet_del_vqs(vi); -free_stats: - free_percpu(vi->stats); free: free_netdev(dev); return err; @@ -2781,7 +2873,6 @@ static void virtnet_remove(struct virtio_device *vdev) remove_vq_common(vi); - free_percpu(vi->stats); free_netdev(vi->dev); } diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index d1c7029ded7c..cf95290b160c 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1616,7 +1616,6 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, rq->rx_ring[i].basePA); rq->rx_ring[i].base = NULL; } - rq->buf_info[i] = NULL; } if (rq->data_ring.base) { @@ -1638,6 +1637,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, (rq->rx_ring[0].size + rq->rx_ring[1].size); dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0], rq->buf_info_pa); + rq->buf_info[0] = rq->buf_info[1] = NULL; } } diff --git a/drivers/net/wireless/ath/ath10k/ce.h b/drivers/net/wireless/ath/ath10k/ce.h index 06ac2eb70bf5..2c3c8f5e90ea 100644 --- a/drivers/net/wireless/ath/ath10k/ce.h +++ b/drivers/net/wireless/ath/ath10k/ce.h @@ -321,6 +321,7 @@ struct ath10k_ce_ops { dma_addr_t buffer, u32 nbytes, u32 transfer_id, u32 flags); }; + static inline u32 ath10k_ce_base_address(struct ath10k *ar, unsigned int ce_id) { return CE0_BASE_ADDRESS + (CE1_BASE_ADDRESS - CE0_BASE_ADDRESS) * ce_id; diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index fe9341c97f31..b0fdc1023619 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1276,7 +1276,10 @@ static int ath10k_core_fetch_board_data_api_n(struct ath10k *ar, len -= sizeof(*hdr); data = hdr->data; - if (len < ALIGN(ie_len, 4)) { + /* jump over the padding */ + ie_len = ALIGN(ie_len, 4); + + if (len < ie_len) { ath10k_err(ar, "invalid length for board ie_id %d ie_len %zu len %zu\n", ie_id, ie_len, len); ret = -EINVAL; @@ -1315,9 +1318,6 @@ static int ath10k_core_fetch_board_data_api_n(struct ath10k *ar, goto out; } - /* jump over the padding */ - ie_len = ALIGN(ie_len, 4); - len -= ie_len; data += ie_len; } @@ -1448,6 +1448,9 @@ int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name, len -= sizeof(*hdr); data += sizeof(*hdr); + /* jump over the padding */ + ie_len = ALIGN(ie_len, 4); + if (len < ie_len) { ath10k_err(ar, "invalid length for FW IE %d (%zu < %zu)\n", ie_id, len, ie_len); @@ -1553,9 +1556,6 @@ int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name, break; } - /* jump over the padding */ - ie_len = ALIGN(ie_len, 4); - len -= ie_len; data += ie_len; } diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h index 360c71b106d7..8cc2a8b278e4 100644 --- a/drivers/net/wireless/ath/ath10k/htt.h +++ b/drivers/net/wireless/ath/ath10k/htt.h @@ -1856,6 +1856,7 @@ struct ath10k_htt_rx_ops { void* (*htt_get_vaddr_ring)(struct ath10k_htt *htt); void (*htt_reset_paddrs_ring)(struct ath10k_htt *htt, int idx); }; + #define RX_HTT_HDR_STATUS_LEN 64 /* This structure layout is programmed via rx ring setup diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 8abaccc25227..355db6a0fcf3 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1478,13 +1478,10 @@ static int ath10k_pci_dump_memory_section(struct ath10k *ar, if (!mem_region || !buf) return 0; - if (mem_region->section_table.size < 0) - return 0; - cur_section = &mem_region->section_table.sections[0]; if (mem_region->start > cur_section->start) { - ath10k_warn(ar, "incorrect memdump region 0x%x with section start addrress 0x%x.\n", + ath10k_warn(ar, "incorrect memdump region 0x%x with section start address 0x%x.\n", mem_region->start, cur_section->start); return 0; } diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 8c5c2dd8fa7f..cd0f023ccf77 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -922,6 +922,7 @@ static void ath9k_hw_init_interrupt_masks(struct ath_hw *ah, AR_IMR_RXERR | AR_IMR_RXORN | AR_IMR_BCNMISC; + u32 msi_cfg = 0; if (AR_SREV_9340(ah) || AR_SREV_9550(ah) || AR_SREV_9531(ah) || AR_SREV_9561(ah)) @@ -929,22 +930,30 @@ static void ath9k_hw_init_interrupt_masks(struct ath_hw *ah, if (AR_SREV_9300_20_OR_LATER(ah)) { imr_reg |= AR_IMR_RXOK_HP; - if (ah->config.rx_intr_mitigation) + if (ah->config.rx_intr_mitigation) { imr_reg |= AR_IMR_RXINTM | AR_IMR_RXMINTR; - else + msi_cfg |= AR_INTCFG_MSI_RXINTM | AR_INTCFG_MSI_RXMINTR; + } else { imr_reg |= AR_IMR_RXOK_LP; - + msi_cfg |= AR_INTCFG_MSI_RXOK; + } } else { - if (ah->config.rx_intr_mitigation) + if (ah->config.rx_intr_mitigation) { imr_reg |= AR_IMR_RXINTM | AR_IMR_RXMINTR; - else + msi_cfg |= AR_INTCFG_MSI_RXINTM | AR_INTCFG_MSI_RXMINTR; + } else { imr_reg |= AR_IMR_RXOK; + msi_cfg |= AR_INTCFG_MSI_RXOK; + } } - if (ah->config.tx_intr_mitigation) + if (ah->config.tx_intr_mitigation) { imr_reg |= AR_IMR_TXINTM | AR_IMR_TXMINTR; - else + msi_cfg |= AR_INTCFG_MSI_TXINTM | AR_INTCFG_MSI_TXMINTR; + } else { imr_reg |= AR_IMR_TXOK; + msi_cfg |= AR_INTCFG_MSI_TXOK; + } ENABLE_REGWRITE_BUFFER(ah); @@ -952,6 +961,16 @@ static void ath9k_hw_init_interrupt_masks(struct ath_hw *ah, ah->imrs2_reg |= AR_IMR_S2_GTT; REG_WRITE(ah, AR_IMR_S2, ah->imrs2_reg); + if (ah->msi_enabled) { + ah->msi_reg = REG_READ(ah, AR_PCIE_MSI); + ah->msi_reg |= AR_PCIE_MSI_HW_DBI_WR_EN; + ah->msi_reg &= AR_PCIE_MSI_HW_INT_PENDING_ADDR_MSI_64; + REG_WRITE(ah, AR_INTCFG, msi_cfg); + ath_dbg(ath9k_hw_common(ah), ANY, + "value of AR_INTCFG=0x%X, msi_cfg=0x%X\n", + REG_READ(ah, AR_INTCFG), msi_cfg); + } + if (!AR_SREV_9100(ah)) { REG_WRITE(ah, AR_INTR_SYNC_CAUSE, 0xFFFFFFFF); REG_WRITE(ah, AR_INTR_SYNC_ENABLE, sync_default); diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 4ac70827d142..0d6c07c77372 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -977,6 +977,9 @@ struct ath_hw { bool tpc_enabled; u8 tx_power[Ar5416RateSize]; u8 tx_power_stbc[Ar5416RateSize]; + bool msi_enabled; + u32 msi_mask; + u32 msi_reg; }; struct ath_bus_ops { diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index fa58a32227f5..e479fae5aab9 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -23,6 +23,7 @@ #include <linux/of.h> #include <linux/of_net.h> #include <linux/relay.h> +#include <linux/dmi.h> #include <net/ieee80211_radiotap.h> #include "ath9k.h" @@ -75,6 +76,10 @@ MODULE_PARM_DESC(use_chanctx, "Enable channel context for concurrency"); #endif /* CONFIG_ATH9K_CHANNEL_CONTEXT */ +int ath9k_use_msi; +module_param_named(use_msi, ath9k_use_msi, int, 0444); +MODULE_PARM_DESC(use_msi, "Use MSI instead of INTx if possible"); + bool is_ath9k_unloaded; #ifdef CONFIG_MAC80211_LEDS @@ -92,6 +97,56 @@ static const struct ieee80211_tpt_blink ath9k_tpt_blink[] = { }; #endif +static int __init set_use_msi(const struct dmi_system_id *dmi) +{ + ath9k_use_msi = 1; + return 1; +} + +static const struct dmi_system_id ath9k_quirks[] __initconst = { + { + .callback = set_use_msi, + .ident = "Dell Inspiron 24-3460", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 24-3460"), + }, + }, + { + .callback = set_use_msi, + .ident = "Dell Vostro 3262", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3262"), + }, + }, + { + .callback = set_use_msi, + .ident = "Dell Inspiron 3472", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 3472"), + }, + }, + { + .callback = set_use_msi, + .ident = "Dell Vostro 15-3572", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 15-3572"), + }, + }, + { + .callback = set_use_msi, + .ident = "Dell Inspiron 14-3473", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 14-3473"), + }, + }, + {} +}; + static void ath9k_deinit_softc(struct ath_softc *sc); static void ath9k_op_ps_wakeup(struct ath_common *common) @@ -1100,6 +1155,8 @@ static int __init ath9k_init(void) goto err_pci_exit; } + dmi_check_system(ath9k_quirks); + return 0; err_pci_exit: diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c index 77c94f9e7b61..58d02c19b6d0 100644 --- a/drivers/net/wireless/ath/ath9k/mac.c +++ b/drivers/net/wireless/ath/ath9k/mac.c @@ -832,6 +832,43 @@ static void __ath9k_hw_enable_interrupts(struct ath_hw *ah) } ath_dbg(common, INTERRUPT, "AR_IMR 0x%x IER 0x%x\n", REG_READ(ah, AR_IMR), REG_READ(ah, AR_IER)); + + if (ah->msi_enabled) { + u32 _msi_reg = 0; + u32 i = 0; + u32 msi_pend_addr_mask = AR_PCIE_MSI_HW_INT_PENDING_ADDR_MSI_64; + + ath_dbg(ath9k_hw_common(ah), INTERRUPT, + "Enabling MSI, msi_mask=0x%X\n", ah->msi_mask); + + REG_WRITE(ah, AR_INTR_PRIO_ASYNC_ENABLE, ah->msi_mask); + REG_WRITE(ah, AR_INTR_PRIO_ASYNC_MASK, ah->msi_mask); + ath_dbg(ath9k_hw_common(ah), INTERRUPT, + "AR_INTR_PRIO_ASYNC_ENABLE=0x%X, AR_INTR_PRIO_ASYNC_MASK=0x%X\n", + REG_READ(ah, AR_INTR_PRIO_ASYNC_ENABLE), + REG_READ(ah, AR_INTR_PRIO_ASYNC_MASK)); + + if (ah->msi_reg == 0) + ah->msi_reg = REG_READ(ah, AR_PCIE_MSI); + + ath_dbg(ath9k_hw_common(ah), INTERRUPT, + "AR_PCIE_MSI=0x%X, ah->msi_reg = 0x%X\n", + AR_PCIE_MSI, ah->msi_reg); + + i = 0; + do { + REG_WRITE(ah, AR_PCIE_MSI, + (ah->msi_reg | AR_PCIE_MSI_ENABLE) + & msi_pend_addr_mask); + _msi_reg = REG_READ(ah, AR_PCIE_MSI); + i++; + } while ((_msi_reg & AR_PCIE_MSI_ENABLE) == 0 && i < 200); + + if (i >= 200) + ath_err(ath9k_hw_common(ah), + "%s: _msi_reg = 0x%X\n", + __func__, _msi_reg); + } } void ath9k_hw_resume_interrupts(struct ath_hw *ah) @@ -878,12 +915,21 @@ void ath9k_hw_set_interrupts(struct ath_hw *ah) if (!(ints & ATH9K_INT_GLOBAL)) ath9k_hw_disable_interrupts(ah); + if (ah->msi_enabled) { + ath_dbg(common, INTERRUPT, "Clearing AR_INTR_PRIO_ASYNC_ENABLE\n"); + + REG_WRITE(ah, AR_INTR_PRIO_ASYNC_ENABLE, 0); + REG_READ(ah, AR_INTR_PRIO_ASYNC_ENABLE); + } + ath_dbg(common, INTERRUPT, "New interrupt mask 0x%x\n", ints); mask = ints & ATH9K_INT_COMMON; mask2 = 0; + ah->msi_mask = 0; if (ints & ATH9K_INT_TX) { + ah->msi_mask |= AR_INTR_PRIO_TX; if (ah->config.tx_intr_mitigation) mask |= AR_IMR_TXMINTR | AR_IMR_TXINTM; else { @@ -898,6 +944,7 @@ void ath9k_hw_set_interrupts(struct ath_hw *ah) mask |= AR_IMR_TXEOL; } if (ints & ATH9K_INT_RX) { + ah->msi_mask |= AR_INTR_PRIO_RXLP | AR_INTR_PRIO_RXHP; if (AR_SREV_9300_20_OR_LATER(ah)) { mask |= AR_IMR_RXERR | AR_IMR_RXOK_HP; if (ah->config.rx_intr_mitigation) { diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c index 223606311261..645f0fbd9179 100644 --- a/drivers/net/wireless/ath/ath9k/pci.c +++ b/drivers/net/wireless/ath/ath9k/pci.c @@ -22,6 +22,8 @@ #include <linux/module.h> #include "ath9k.h" +extern int ath9k_use_msi; + static const struct pci_device_id ath_pci_id_table[] = { { PCI_VDEVICE(ATHEROS, 0x0023) }, /* PCI */ { PCI_VDEVICE(ATHEROS, 0x0024) }, /* PCI-E */ @@ -889,6 +891,7 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) u32 val; int ret = 0; char hw_name[64]; + int msi_enabled = 0; if (pcim_enable_device(pdev)) return -EIO; @@ -960,7 +963,20 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) sc->mem = pcim_iomap_table(pdev)[0]; sc->driver_data = id->driver_data; - ret = request_irq(pdev->irq, ath_isr, IRQF_SHARED, "ath9k", sc); + if (ath9k_use_msi) { + if (pci_enable_msi(pdev) == 0) { + msi_enabled = 1; + dev_err(&pdev->dev, "Using MSI\n"); + } else { + dev_err(&pdev->dev, "Using INTx\n"); + } + } + + if (!msi_enabled) + ret = request_irq(pdev->irq, ath_isr, IRQF_SHARED, "ath9k", sc); + else + ret = request_irq(pdev->irq, ath_isr, 0, "ath9k", sc); + if (ret) { dev_err(&pdev->dev, "request_irq failed\n"); goto err_irq; @@ -974,6 +990,9 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_init; } + sc->sc_ah->msi_enabled = msi_enabled; + sc->sc_ah->msi_reg = 0; + ath9k_hw_name(sc->sc_ah, hw_name, sizeof(hw_name)); wiphy_info(hw->wiphy, "%s mem=0x%lx, irq=%d\n", hw_name, (unsigned long)sc->mem, pdev->irq); diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h index 80ff69f99229..653e79611830 100644 --- a/drivers/net/wireless/ath/ath9k/reg.h +++ b/drivers/net/wireless/ath/ath9k/reg.h @@ -146,6 +146,14 @@ #define AR_MACMISC_MISC_OBS_BUS_MSB_S 15 #define AR_MACMISC_MISC_OBS_BUS_1 1 +#define AR_INTCFG 0x005C +#define AR_INTCFG_MSI_RXOK 0x00000000 +#define AR_INTCFG_MSI_RXINTM 0x00000004 +#define AR_INTCFG_MSI_RXMINTR 0x00000006 +#define AR_INTCFG_MSI_TXOK 0x00000000 +#define AR_INTCFG_MSI_TXINTM 0x00000010 +#define AR_INTCFG_MSI_TXMINTR 0x00000018 + #define AR_DATABUF_SIZE 0x0060 #define AR_DATABUF_SIZE_MASK 0x00000FFF @@ -1256,6 +1264,13 @@ enum { #define AR_PCIE_MSI (AR_SREV_9340(ah) ? 0x40d8 : \ (AR_SREV_9300_20_OR_LATER(ah) ? 0x40a4 : 0x4094)) #define AR_PCIE_MSI_ENABLE 0x00000001 +#define AR_PCIE_MSI_HW_DBI_WR_EN 0x02000000 +#define AR_PCIE_MSI_HW_INT_PENDING_ADDR 0xFFA0C1FF /* bits 8..11: value must be 0x5060 */ +#define AR_PCIE_MSI_HW_INT_PENDING_ADDR_MSI_64 0xFFA0C9FF /* bits 8..11: value must be 0x5064 */ + +#define AR_INTR_PRIO_TX 0x00000001 +#define AR_INTR_PRIO_RXLP 0x00000002 +#define AR_INTR_PRIO_RXHP 0x00000004 #define AR_INTR_PRIO_SYNC_ENABLE (AR_SREV_9340(ah) ? 0x4088 : 0x40c4) #define AR_INTR_PRIO_ASYNC_MASK (AR_SREV_9340(ah) ? 0x408c : 0x40c8) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c index 6a59d0609d30..9be0b051066a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c @@ -182,12 +182,9 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp) err = request_firmware(&clm, clm_name, dev); if (err) { - if (err == -ENOENT) { - brcmf_dbg(INFO, "continue with CLM data currently present in firmware\n"); - return 0; - } - brcmf_err("request CLM blob file failed (%d)\n", err); - return err; + brcmf_info("no clm_blob available(err=%d), device may have limited channels available\n", + err); + return 0; } chunk_buf = kzalloc(sizeof(*chunk_buf) + MAX_CHUNK_LEN - 1, GFP_KERNEL); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index e54255597fac..1cf22e62e3dd 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -32,6 +32,7 @@ #include <net/genetlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <linux/rhashtable.h> #include "mac80211_hwsim.h" #define WARN_QUEUE 100 @@ -490,6 +491,7 @@ static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = { static spinlock_t hwsim_radio_lock; static LIST_HEAD(hwsim_radios); static struct workqueue_struct *hwsim_wq; +static struct rhashtable hwsim_radios_rht; static int hwsim_radio_idx; static struct platform_driver mac80211_hwsim_driver = { @@ -500,6 +502,7 @@ static struct platform_driver mac80211_hwsim_driver = { struct mac80211_hwsim_data { struct list_head list; + struct rhash_head rht; struct ieee80211_hw *hw; struct device *dev; struct ieee80211_supported_band bands[NUM_NL80211_BANDS]; @@ -574,6 +577,13 @@ struct mac80211_hwsim_data { u64 tx_failed; }; +static const struct rhashtable_params hwsim_rht_params = { + .nelem_hint = 2, + .automatic_shrinking = true, + .key_len = ETH_ALEN, + .key_offset = offsetof(struct mac80211_hwsim_data, addresses[1]), + .head_offset = offsetof(struct mac80211_hwsim_data, rht), +}; struct hwsim_radiotap_hdr { struct ieee80211_radiotap_header hdr; @@ -1009,6 +1019,36 @@ static int hwsim_unicast_netgroup(struct mac80211_hwsim_data *data, return res; } +static inline u16 trans_tx_rate_flags_ieee2hwsim(struct ieee80211_tx_rate *rate) +{ + u16 result = 0; + + if (rate->flags & IEEE80211_TX_RC_USE_RTS_CTS) + result |= MAC80211_HWSIM_TX_RC_USE_RTS_CTS; + if (rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT) + result |= MAC80211_HWSIM_TX_RC_USE_CTS_PROTECT; + if (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE) + result |= MAC80211_HWSIM_TX_RC_USE_SHORT_PREAMBLE; + if (rate->flags & IEEE80211_TX_RC_MCS) + result |= MAC80211_HWSIM_TX_RC_MCS; + if (rate->flags & IEEE80211_TX_RC_GREEN_FIELD) + result |= MAC80211_HWSIM_TX_RC_GREEN_FIELD; + if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + result |= MAC80211_HWSIM_TX_RC_40_MHZ_WIDTH; + if (rate->flags & IEEE80211_TX_RC_DUP_DATA) + result |= MAC80211_HWSIM_TX_RC_DUP_DATA; + if (rate->flags & IEEE80211_TX_RC_SHORT_GI) + result |= MAC80211_HWSIM_TX_RC_SHORT_GI; + if (rate->flags & IEEE80211_TX_RC_VHT_MCS) + result |= MAC80211_HWSIM_TX_RC_VHT_MCS; + if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) + result |= MAC80211_HWSIM_TX_RC_80_MHZ_WIDTH; + if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) + result |= MAC80211_HWSIM_TX_RC_160_MHZ_WIDTH; + + return result; +} + static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, struct sk_buff *my_skb, int dst_portid) @@ -1021,6 +1061,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, unsigned int hwsim_flags = 0; int i; struct hwsim_tx_rate tx_attempts[IEEE80211_TX_MAX_RATES]; + struct hwsim_tx_rate_flag tx_attempts_flags[IEEE80211_TX_MAX_RATES]; uintptr_t cookie; if (data->ps != PS_DISABLED) @@ -1072,7 +1113,11 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { tx_attempts[i].idx = info->status.rates[i].idx; + tx_attempts_flags[i].idx = info->status.rates[i].idx; tx_attempts[i].count = info->status.rates[i].count; + tx_attempts_flags[i].flags = + trans_tx_rate_flags_ieee2hwsim( + &info->status.rates[i]); } if (nla_put(skb, HWSIM_ATTR_TX_INFO, @@ -1080,6 +1125,11 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, tx_attempts)) goto nla_put_failure; + if (nla_put(skb, HWSIM_ATTR_TX_INFO_FLAGS, + sizeof(struct hwsim_tx_rate_flag) * IEEE80211_TX_MAX_RATES, + tx_attempts_flags)) + goto nla_put_failure; + /* We create a cookie to identify this skb */ data->pending_cookie++; cookie = data->pending_cookie; @@ -2732,6 +2782,15 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); spin_lock_bh(&hwsim_radio_lock); + err = rhashtable_insert_fast(&hwsim_radios_rht, &data->rht, + hwsim_rht_params); + if (err < 0) { + pr_debug("mac80211_hwsim: radio index %d already present\n", + idx); + spin_unlock_bh(&hwsim_radio_lock); + goto failed_final_insert; + } + list_add_tail(&data->list, &hwsim_radios); spin_unlock_bh(&hwsim_radio_lock); @@ -2740,6 +2799,9 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, return idx; +failed_final_insert: + debugfs_remove_recursive(data->debugfs); + ieee80211_unregister_hw(data->hw); failed_hw: device_release_driver(data->dev); failed_bind: @@ -2875,22 +2937,9 @@ static void hwsim_mon_setup(struct net_device *dev) static struct mac80211_hwsim_data *get_hwsim_data_ref_from_addr(const u8 *addr) { - struct mac80211_hwsim_data *data; - bool _found = false; - - spin_lock_bh(&hwsim_radio_lock); - list_for_each_entry(data, &hwsim_radios, list) { - if (memcmp(data->addresses[1].addr, addr, ETH_ALEN) == 0) { - _found = true; - break; - } - } - spin_unlock_bh(&hwsim_radio_lock); - - if (!_found) - return NULL; - - return data; + return rhashtable_lookup_fast(&hwsim_radios_rht, + addr, + hwsim_rht_params); } static void hwsim_register_wmediumd(struct net *net, u32 portid) @@ -2975,7 +3024,6 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { txi->status.rates[i].idx = tx_attempts[i].idx; txi->status.rates[i].count = tx_attempts[i].count; - /*txi->status.rates[i].flags = 0;*/ } txi->status.ack_signal = nla_get_u32(info->attrs[HWSIM_ATTR_SIGNAL]); @@ -3155,8 +3203,10 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_REG_CUSTOM_REG]) { u32 idx = nla_get_u32(info->attrs[HWSIM_ATTR_REG_CUSTOM_REG]); - if (idx >= ARRAY_SIZE(hwsim_world_regdom_custom)) + if (idx >= ARRAY_SIZE(hwsim_world_regdom_custom)) { + kfree(hwname); return -EINVAL; + } param.regd = hwsim_world_regdom_custom[idx]; } @@ -3197,6 +3247,8 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) continue; list_del(&data->list); + rhashtable_remove_fast(&hwsim_radios_rht, &data->rht, + hwsim_rht_params); spin_unlock_bh(&hwsim_radio_lock); mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), info); @@ -3352,6 +3404,8 @@ static void remove_user_radios(u32 portid) list_for_each_entry_safe(entry, tmp, &hwsim_radios, list) { if (entry->destroy_on_close && entry->portid == portid) { list_del(&entry->list); + rhashtable_remove_fast(&hwsim_radios_rht, &entry->rht, + hwsim_rht_params); INIT_WORK(&entry->destroy_work, destroy_radio); queue_work(hwsim_wq, &entry->destroy_work); } @@ -3427,6 +3481,8 @@ static void __net_exit hwsim_exit_net(struct net *net) continue; list_del(&data->list); + rhashtable_remove_fast(&hwsim_radios_rht, &data->rht, + hwsim_rht_params); INIT_WORK(&data->destroy_work, destroy_radio); queue_work(hwsim_wq, &data->destroy_work); } @@ -3463,6 +3519,7 @@ static int __init init_mac80211_hwsim(void) hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0); if (!hwsim_wq) return -ENOMEM; + rhashtable_init(&hwsim_radios_rht, &hwsim_rht_params); err = register_pernet_device(&hwsim_net_ops); if (err) @@ -3604,6 +3661,7 @@ static void __exit exit_mac80211_hwsim(void) mac80211_hwsim_free(); flush_workqueue(hwsim_wq); + rhashtable_destroy(&hwsim_radios_rht); unregister_netdev(hwsim_mon); platform_driver_unregister(&mac80211_hwsim_driver); unregister_pernet_device(&hwsim_net_ops); diff --git a/drivers/net/wireless/mac80211_hwsim.h b/drivers/net/wireless/mac80211_hwsim.h index 3f5eda591dba..a96a79c1eff5 100644 --- a/drivers/net/wireless/mac80211_hwsim.h +++ b/drivers/net/wireless/mac80211_hwsim.h @@ -64,7 +64,8 @@ enum hwsim_tx_control_flags { * @HWSIM_CMD_TX_INFO_FRAME: Transmission info report from user space to * kernel, uses: * %HWSIM_ATTR_ADDR_TRANSMITTER, %HWSIM_ATTR_FLAGS, - * %HWSIM_ATTR_TX_INFO, %HWSIM_ATTR_SIGNAL, %HWSIM_ATTR_COOKIE + * %HWSIM_ATTR_TX_INFO, %WSIM_ATTR_TX_INFO_FLAGS, + * %HWSIM_ATTR_SIGNAL, %HWSIM_ATTR_COOKIE * @HWSIM_CMD_NEW_RADIO: create a new radio with the given parameters, * returns the radio ID (>= 0) or negative on errors, if successful * then multicast the result @@ -123,6 +124,8 @@ enum { * @HWSIM_ATTR_RADIO_NAME: Name of radio, e.g. phy666 * @HWSIM_ATTR_NO_VIF: Do not create vif (wlanX) when creating radio. * @HWSIM_ATTR_FREQ: Frequency at which packet is transmitted or received. + * @HWSIM_ATTR_TX_INFO_FLAGS: additional flags for corresponding + * rates of %HWSIM_ATTR_TX_INFO * @__HWSIM_ATTR_MAX: enum limit */ @@ -149,6 +152,7 @@ enum { HWSIM_ATTR_NO_VIF, HWSIM_ATTR_FREQ, HWSIM_ATTR_PAD, + HWSIM_ATTR_TX_INFO_FLAGS, __HWSIM_ATTR_MAX, }; #define HWSIM_ATTR_MAX (__HWSIM_ATTR_MAX - 1) @@ -171,4 +175,66 @@ struct hwsim_tx_rate { u8 count; } __packed; +/** + * enum hwsim_tx_rate_flags - per-rate flags set by the rate control algorithm. + * Inspired by structure mac80211_rate_control_flags. New flags may be + * appended, but old flags not deleted, to keep compatibility for + * userspace. + * + * These flags are set by the Rate control algorithm for each rate during tx, + * in the @flags member of struct ieee80211_tx_rate. + * + * @MAC80211_HWSIM_TX_RC_USE_RTS_CTS: Use RTS/CTS exchange for this rate. + * @MAC80211_HWSIM_TX_RC_USE_CTS_PROTECT: CTS-to-self protection is required. + * This is set if the current BSS requires ERP protection. + * @MAC80211_HWSIM_TX_RC_USE_SHORT_PREAMBLE: Use short preamble. + * @MAC80211_HWSIM_TX_RC_MCS: HT rate. + * @MAC80211_HWSIM_TX_RC_VHT_MCS: VHT MCS rate, in this case the idx field is + * split into a higher 4 bits (Nss) and lower 4 bits (MCS number) + * @MAC80211_HWSIM_TX_RC_GREEN_FIELD: Indicates whether this rate should be used + * in Greenfield mode. + * @MAC80211_HWSIM_TX_RC_40_MHZ_WIDTH: Indicates if the Channel Width should be + * 40 MHz. + * @MAC80211_HWSIM_TX_RC_80_MHZ_WIDTH: Indicates 80 MHz transmission + * @MAC80211_HWSIM_TX_RC_160_MHZ_WIDTH: Indicates 160 MHz transmission + * (80+80 isn't supported yet) + * @MAC80211_HWSIM_TX_RC_DUP_DATA: The frame should be transmitted on both of + * the adjacent 20 MHz channels, if the current channel type is + * NL80211_CHAN_HT40MINUS or NL80211_CHAN_HT40PLUS. + * @MAC80211_HWSIM_TX_RC_SHORT_GI: Short Guard interval should be used for this + * rate. + */ +enum hwsim_tx_rate_flags { + MAC80211_HWSIM_TX_RC_USE_RTS_CTS = BIT(0), + MAC80211_HWSIM_TX_RC_USE_CTS_PROTECT = BIT(1), + MAC80211_HWSIM_TX_RC_USE_SHORT_PREAMBLE = BIT(2), + + /* rate index is an HT/VHT MCS instead of an index */ + MAC80211_HWSIM_TX_RC_MCS = BIT(3), + MAC80211_HWSIM_TX_RC_GREEN_FIELD = BIT(4), + MAC80211_HWSIM_TX_RC_40_MHZ_WIDTH = BIT(5), + MAC80211_HWSIM_TX_RC_DUP_DATA = BIT(6), + MAC80211_HWSIM_TX_RC_SHORT_GI = BIT(7), + MAC80211_HWSIM_TX_RC_VHT_MCS = BIT(8), + MAC80211_HWSIM_TX_RC_80_MHZ_WIDTH = BIT(9), + MAC80211_HWSIM_TX_RC_160_MHZ_WIDTH = BIT(10), +}; + +/** + * struct hwsim_tx_rate - rate selection/status + * + * @idx: rate index to attempt to send with + * @count: number of tries in this rate before going to the next rate + * + * A value of -1 for @idx indicates an invalid rate and, if used + * in an array of retry rates, that no more rates should be tried. + * + * When used for transmit status reporting, the driver should + * always report the rate and number of retries used. + * + */ +struct hwsim_tx_rate_flag { + s8 idx; + u16 flags; +} __packed; #endif /* __MAC80211_HWSIM_H */ diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 23209c5cab05..97a6199692ab 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -310,8 +310,6 @@ static void mwifiex_pcie_remove(struct pci_dev *pdev) mwifiex_init_shutdown_fw(priv, MWIFIEX_FUNC_SHUTDOWN); } - cancel_work_sync(&card->work); - mwifiex_remove_card(adapter); } @@ -2788,7 +2786,10 @@ static void mwifiex_pcie_card_reset_work(struct mwifiex_adapter *adapter) { struct pcie_service_card *card = adapter->card; - pci_reset_function(card->dev); + /* We can't afford to wait here; remove() might be waiting on us. If we + * can't grab the device lock, maybe we'll get another chance later. + */ + pci_try_reset_function(card->dev); } static void mwifiex_pcie_work(struct work_struct *work) diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index 248858723753..a82880132af4 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -399,8 +399,6 @@ mwifiex_sdio_remove(struct sdio_func *func) mwifiex_init_shutdown_fw(priv, MWIFIEX_FUNC_SHUTDOWN); } - cancel_work_sync(&card->work); - mwifiex_remove_card(adapter); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c index ecc23f58da98..f7c0df0759f7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c @@ -186,7 +186,7 @@ void mt76x2_mac_write_txwi(struct mt76x2_dev *dev, struct mt76x2_txwi *txwi, txwi->pktid = 1; spin_lock_bh(&dev->mt76.lock); - if (rate->idx < 0 || !rate->count) { + if (wcid && (rate->idx < 0 || !rate->count)) { txwi->rate = wcid->tx_rate; max_txpwr_adj = wcid->max_txpwr_adj; nss = wcid->tx_rate_nss; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2_main.c index 963aea9e8801..79915cbee3f0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2_main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2_main.c @@ -465,13 +465,15 @@ mt76x2_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct mt76x2_dev *dev = hw->priv; struct mt76x2_sta *msta = (struct mt76x2_sta *) sta->drv_priv; struct ieee80211_txq *txq = sta->txq[params->tid]; - struct mt76_txq *mtxq = (struct mt76_txq *) txq->drv_priv; u16 tid = params->tid; u16 *ssn = ¶ms->ssn; + struct mt76_txq *mtxq; if (!txq) return -EINVAL; + mtxq = (struct mt76_txq *)txq->drv_priv; + switch (action) { case IEEE80211_AMPDU_RX_START: mt76_set(dev, MT_WCID_ADDR(msta->wcid.idx) + 4, BIT(16 + tid)); diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 0ba9c0cc95e1..c8ebf738cb2b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -395,6 +395,7 @@ static void _rtl_init_mac80211(struct ieee80211_hw *hw) ieee80211_hw_set(hw, CONNECTION_MONITOR); ieee80211_hw_set(hw, MFP_CAPABLE); ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, SUPPORTS_AMSDU_IN_AMPDU); /* swlps or hwlps has been set in diff chip in init_sw_vars */ if (rtlpriv->psc.swctrl_lps) { @@ -551,7 +552,8 @@ int rtl_init_core(struct ieee80211_hw *hw) /* <4> locks */ mutex_init(&rtlpriv->locks.conf_mutex); - spin_lock_init(&rtlpriv->locks.ips_lock); + mutex_init(&rtlpriv->locks.ips_mutex); + mutex_init(&rtlpriv->locks.lps_mutex); spin_lock_init(&rtlpriv->locks.irq_th_lock); spin_lock_init(&rtlpriv->locks.h2c_lock); spin_lock_init(&rtlpriv->locks.rf_ps_lock); @@ -561,9 +563,7 @@ int rtl_init_core(struct ieee80211_hw *hw) spin_lock_init(&rtlpriv->locks.c2hcmd_lock); spin_lock_init(&rtlpriv->locks.scan_list_lock); spin_lock_init(&rtlpriv->locks.cck_and_rw_pagea_lock); - spin_lock_init(&rtlpriv->locks.check_sendpkt_lock); spin_lock_init(&rtlpriv->locks.fw_ps_lock); - spin_lock_init(&rtlpriv->locks.lps_lock); spin_lock_init(&rtlpriv->locks.iqk_lock); /* <5> init list */ INIT_LIST_HEAD(&rtlpriv->entry_list); @@ -1229,7 +1229,6 @@ bool rtl_tx_mgmt_proc(struct ieee80211_hw *hw, struct sk_buff *skb) } if (ieee80211_is_auth(fc)) { RT_TRACE(rtlpriv, COMP_SEND, DBG_DMESG, "MAC80211_LINKING\n"); - rtl_ips_nic_on(hw); mac->link_state = MAC80211_LINKING; /* Dul mac */ diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c index 5f3eda31187a..af8f3778dc91 100644 --- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c +++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c @@ -25,14 +25,6 @@ #include "halbt_precomp.h" -/*********************************************** - * Global variables - ***********************************************/ - -struct btc_coexist gl_bt_coexist; - -u32 btc_dbg_type[BTC_MSG_MAX]; - /*************************************************** * Debug related function ***************************************************/ @@ -215,6 +207,110 @@ u8 rtl_get_hwpg_package_type(struct rtl_priv *rtlpriv) return rtlhal->package_type; } +static +u8 rtl_get_hwpg_rfe_type(struct rtl_priv *rtlpriv) +{ + struct rtl_hal *rtlhal = rtl_hal(rtlpriv); + + return rtlhal->rfe_type; +} + +static +bool halbtc_is_hw_mailbox_exist(struct btc_coexist *btcoexist) +{ + if (IS_HARDWARE_TYPE_8812(btcoexist->adapter)) + return false; + else + return true; +} + +static +bool halbtc_send_bt_mp_operation(struct btc_coexist *btcoexist, u8 op_code, + u8 *cmd, u32 len, unsigned long wait_ms) +{ + struct rtl_priv *rtlpriv; + const u8 oper_ver = 0; + u8 req_num; + + if (!halbtc_is_hw_mailbox_exist(btcoexist)) + return false; + + if (wait_ms) /* before h2c to avoid race condition */ + reinit_completion(&btcoexist->bt_mp_comp); + + rtlpriv = btcoexist->adapter; + + /* fill req_num by op_code, and rtl_btc_btmpinfo_notify() use it + * to know message type + */ + switch (op_code) { + case BT_OP_GET_BT_VERSION: + req_num = BT_SEQ_GET_BT_VERSION; + break; + case BT_OP_GET_AFH_MAP_L: + req_num = BT_SEQ_GET_AFH_MAP_L; + break; + case BT_OP_GET_AFH_MAP_M: + req_num = BT_SEQ_GET_AFH_MAP_M; + break; + case BT_OP_GET_AFH_MAP_H: + req_num = BT_SEQ_GET_AFH_MAP_H; + break; + case BT_OP_GET_BT_COEX_SUPPORTED_FEATURE: + req_num = BT_SEQ_GET_BT_COEX_SUPPORTED_FEATURE; + break; + case BT_OP_GET_BT_COEX_SUPPORTED_VERSION: + req_num = BT_SEQ_GET_BT_COEX_SUPPORTED_VERSION; + break; + case BT_OP_GET_BT_ANT_DET_VAL: + req_num = BT_SEQ_GET_BT_ANT_DET_VAL; + break; + case BT_OP_GET_BT_BLE_SCAN_PARA: + req_num = BT_SEQ_GET_BT_BLE_SCAN_PARA; + break; + case BT_OP_GET_BT_BLE_SCAN_TYPE: + req_num = BT_SEQ_GET_BT_BLE_SCAN_TYPE; + break; + case BT_OP_GET_BT_DEVICE_INFO: + req_num = BT_SEQ_GET_BT_DEVICE_INFO; + break; + case BT_OP_GET_BT_FORBIDDEN_SLOT_VAL: + req_num = BT_SEQ_GET_BT_FORB_SLOT_VAL; + break; + case BT_OP_WRITE_REG_ADDR: + case BT_OP_WRITE_REG_VALUE: + case BT_OP_READ_REG: + default: + req_num = BT_SEQ_DONT_CARE; + break; + } + + cmd[0] |= (oper_ver & 0x0f); /* Set OperVer */ + cmd[0] |= ((req_num << 4) & 0xf0); /* Set ReqNum */ + cmd[1] = op_code; + rtlpriv->cfg->ops->fill_h2c_cmd(rtlpriv->mac80211.hw, 0x67, len, cmd); + + /* wait? */ + if (!wait_ms) + return true; + + RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_LOUD, + "btmpinfo wait req_num=%d wait=%ld\n", req_num, wait_ms); + + if (in_interrupt()) + return false; + + if (wait_for_completion_timeout(&btcoexist->bt_mp_comp, + msecs_to_jiffies(wait_ms)) == 0) { + RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_DMESG, + "btmpinfo wait (req_num=%d) timeout\n", req_num); + + return false; /* timeout */ + } + + return true; +} + static void halbtc_leave_lps(struct btc_coexist *btcoexist) { struct rtl_priv *rtlpriv; @@ -342,24 +438,79 @@ static void halbtc_aggregation_check(struct btc_coexist *btcoexist) static u32 halbtc_get_bt_patch_version(struct btc_coexist *btcoexist) { - struct rtl_priv *rtlpriv = btcoexist->adapter; u8 cmd_buffer[4] = {0}; - u8 oper_ver = 0; - u8 req_num = 0x0E; if (btcoexist->bt_info.bt_real_fw_ver) goto label_done; - cmd_buffer[0] |= (oper_ver & 0x0f); /* Set OperVer */ - cmd_buffer[0] |= ((req_num << 4) & 0xf0); /* Set ReqNum */ - cmd_buffer[1] = 0; /* BT_OP_GET_BT_VERSION = 0 */ - rtlpriv->cfg->ops->fill_h2c_cmd(rtlpriv->mac80211.hw, 0x67, 4, - &cmd_buffer[0]); + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + halbtc_send_bt_mp_operation(btcoexist, BT_OP_GET_BT_VERSION, + cmd_buffer, 4, 200); label_done: return btcoexist->bt_info.bt_real_fw_ver; } +static u32 halbtc_get_bt_coex_supported_feature(void *btc_context) +{ + struct btc_coexist *btcoexist = (struct btc_coexist *)btc_context; + u8 cmd_buffer[4] = {0}; + + if (btcoexist->bt_info.bt_supported_feature) + goto label_done; + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + halbtc_send_bt_mp_operation(btcoexist, + BT_OP_GET_BT_COEX_SUPPORTED_FEATURE, + cmd_buffer, 4, 200); + +label_done: + return btcoexist->bt_info.bt_supported_feature; +} + +static u32 halbtc_get_bt_coex_supported_version(void *btc_context) +{ + struct btc_coexist *btcoexist = (struct btc_coexist *)btc_context; + u8 cmd_buffer[4] = {0}; + + if (btcoexist->bt_info.bt_supported_version) + goto label_done; + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + halbtc_send_bt_mp_operation(btcoexist, + BT_OP_GET_BT_COEX_SUPPORTED_VERSION, + cmd_buffer, 4, 200); + +label_done: + return btcoexist->bt_info.bt_supported_version; +} + +static u32 halbtc_get_bt_device_info(void *btc_context) +{ + struct btc_coexist *btcoexist = (struct btc_coexist *)btc_context; + u8 cmd_buffer[4] = {0}; + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + halbtc_send_bt_mp_operation(btcoexist, + BT_OP_GET_BT_DEVICE_INFO, + cmd_buffer, 4, 200); + + return btcoexist->bt_info.bt_device_info; +} + +static u32 halbtc_get_bt_forbidden_slot_val(void *btc_context) +{ + struct btc_coexist *btcoexist = (struct btc_coexist *)btc_context; + u8 cmd_buffer[4] = {0}; + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + halbtc_send_bt_mp_operation(btcoexist, + BT_OP_GET_BT_FORBIDDEN_SLOT_VAL, + cmd_buffer, 4, 200); + + return btcoexist->bt_info.bt_forb_slot_val; +} + u32 halbtc_get_wifi_link_status(struct btc_coexist *btcoexist) { /* return value: @@ -521,6 +672,18 @@ static bool halbtc_get(void *void_btcoexist, u8 get_type, void *out_buf) case BTC_GET_U4_VENDOR: *u32_tmp = BTC_VENDOR_OTHER; break; + case BTC_GET_U4_SUPPORTED_VERSION: + *u32_tmp = halbtc_get_bt_coex_supported_version(btcoexist); + break; + case BTC_GET_U4_SUPPORTED_FEATURE: + *u32_tmp = halbtc_get_bt_coex_supported_feature(btcoexist); + break; + case BTC_GET_U4_BT_DEVICE_INFO: + *u32_tmp = halbtc_get_bt_device_info(btcoexist); + break; + case BTC_GET_U4_BT_FORBIDDEN_SLOT_VAL: + *u32_tmp = halbtc_get_bt_forbidden_slot_val(btcoexist); + break; case BTC_GET_U1_WIFI_DOT11_CHNL: *u8_tmp = rtlphy->current_channel; break; @@ -677,7 +840,7 @@ static void halbtc_display_wifi_status(struct btc_coexist *btcoexist, u32 wifi_link_status = 0x0; bool bt_hs_on = false, under_ips = false, under_lps = false; bool low_power = false, dc_mode = false; - u8 wifi_chnl = 0, wifi_hs_chnl = 0, fw_ps_state; + u8 wifi_chnl = 0, wifi_hs_chnl = 0; u8 ap_num = 0; wifi_link_status = halbtc_get_wifi_link_status(btcoexist); @@ -733,7 +896,6 @@ static void halbtc_display_wifi_status(struct btc_coexist *btcoexist, dc_mode = true; /*TODO*/ under_ips = rtlpriv->psc.inactive_pwrstate == ERFOFF ? 1 : 0; under_lps = rtlpriv->psc.dot11_psmode == EACTIVE ? 0 : 1; - fw_ps_state = 0; low_power = 0; /*TODO*/ seq_printf(m, "\n %-35s = %s%s%s%s", "Power Status", @@ -902,32 +1064,20 @@ static void halbtc_fill_h2c_cmd(void *bt_context, u8 element_id, void halbtc_set_bt_reg(void *btc_context, u8 reg_type, u32 offset, u32 set_val) { struct btc_coexist *btcoexist = (struct btc_coexist *)btc_context; - struct rtl_priv *rtlpriv = btcoexist->adapter; u8 cmd_buffer1[4] = {0}; u8 cmd_buffer2[4] = {0}; - u8 *addr_to_set = (u8 *)&offset; - u8 *value_to_set = (u8 *)&set_val; - u8 oper_ver = 0; - u8 req_num = 0; - if (IS_HARDWARE_TYPE_8723B(btcoexist->adapter)) { - cmd_buffer1[0] |= (oper_ver & 0x0f); /* Set OperVer */ - cmd_buffer1[0] |= ((req_num << 4) & 0xf0); /* Set ReqNum */ - cmd_buffer1[1] = 0x0d; /* OpCode: BT_LO_OP_WRITE_REG_VALUE */ - cmd_buffer1[2] = value_to_set[0]; /* Set WriteRegValue */ - rtlpriv->cfg->ops->fill_h2c_cmd(rtlpriv->mac80211.hw, 0x67, 4, - &cmd_buffer1[0]); - - msleep(200); - req_num++; - - cmd_buffer2[0] |= (oper_ver & 0x0f); /* Set OperVer */ - cmd_buffer2[0] |= ((req_num << 4) & 0xf0); /* Set ReqNum */ - cmd_buffer2[1] = 0x0c; /* OpCode: BT_LO_OP_WRITE_REG_ADDR */ - cmd_buffer2[3] = addr_to_set[0]; /* Set WriteRegAddr */ - rtlpriv->cfg->ops->fill_h2c_cmd(rtlpriv->mac80211.hw, 0x67, 4, - &cmd_buffer2[0]); - } + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + *((__le16 *)&cmd_buffer1[2]) = cpu_to_le16((u16)set_val); + if (!halbtc_send_bt_mp_operation(btcoexist, BT_OP_WRITE_REG_VALUE, + cmd_buffer1, 4, 200)) + return; + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + cmd_buffer2[2] = reg_type; + *((u8 *)&cmd_buffer2[3]) = (u8)offset; + halbtc_send_bt_mp_operation(btcoexist, BT_OP_WRITE_REG_ADDR, + cmd_buffer2, 4, 200); } static void halbtc_display_dbg_msg(void *bt_context, u8 disp_type, @@ -968,12 +1118,95 @@ bool halbtc_under_ips(struct btc_coexist *btcoexist) return false; } +static u8 halbtc_get_ant_det_val_from_bt(void *btc_context) +{ + struct btc_coexist *btcoexist = (struct btc_coexist *)btc_context; + u8 cmd_buffer[4] = {0}; + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + halbtc_send_bt_mp_operation(btcoexist, BT_OP_GET_BT_ANT_DET_VAL, + cmd_buffer, 4, 200); + + /* need wait completion to return correct value */ + + return btcoexist->bt_info.bt_ant_det_val; +} + +static u8 halbtc_get_ble_scan_type_from_bt(void *btc_context) +{ + struct btc_coexist *btcoexist = (struct btc_coexist *)btc_context; + u8 cmd_buffer[4] = {0}; + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + halbtc_send_bt_mp_operation(btcoexist, BT_OP_GET_BT_BLE_SCAN_TYPE, + cmd_buffer, 4, 200); + + /* need wait completion to return correct value */ + + return btcoexist->bt_info.bt_ble_scan_type; +} + +static u32 halbtc_get_ble_scan_para_from_bt(void *btc_context, u8 scan_type) +{ + struct btc_coexist *btcoexist = (struct btc_coexist *)btc_context; + u8 cmd_buffer[4] = {0}; + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + halbtc_send_bt_mp_operation(btcoexist, BT_OP_GET_BT_BLE_SCAN_PARA, + cmd_buffer, 4, 200); + + /* need wait completion to return correct value */ + + return btcoexist->bt_info.bt_ble_scan_para; +} + +static bool halbtc_get_bt_afh_map_from_bt(void *btc_context, u8 map_type, + u8 *afh_map) +{ + struct btc_coexist *btcoexist = (struct btc_coexist *)btc_context; + u8 cmd_buffer[2] = {0}; + bool ret; + u32 *afh_map_l = (u32 *)afh_map; + u32 *afh_map_m = (u32 *)(afh_map + 4); + u16 *afh_map_h = (u16 *)(afh_map + 8); + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + ret = halbtc_send_bt_mp_operation(btcoexist, BT_OP_GET_AFH_MAP_L, + cmd_buffer, 2, 200); + if (!ret) + goto exit; + + *afh_map_l = btcoexist->bt_info.afh_map_l; + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + ret = halbtc_send_bt_mp_operation(btcoexist, BT_OP_GET_AFH_MAP_M, + cmd_buffer, 2, 200); + if (!ret) + goto exit; + + *afh_map_m = btcoexist->bt_info.afh_map_m; + + /* cmd_buffer[0] and [1] is filled by halbtc_send_bt_mp_operation() */ + ret = halbtc_send_bt_mp_operation(btcoexist, BT_OP_GET_AFH_MAP_H, + cmd_buffer, 2, 200); + if (!ret) + goto exit; + + *afh_map_h = btcoexist->bt_info.afh_map_h; + +exit: + return ret; +} + /***************************************************************** * Extern functions called by other module *****************************************************************/ -bool exhalbtc_initlize_variables(void) +bool exhalbtc_initlize_variables(struct rtl_priv *rtlpriv) { - struct btc_coexist *btcoexist = &gl_bt_coexist; + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return false; halbtc_dbg_init(); @@ -999,19 +1232,36 @@ bool exhalbtc_initlize_variables(void) btcoexist->btc_set = halbtc_set; btcoexist->btc_set_bt_reg = halbtc_set_bt_reg; - btcoexist->bt_info.bt_ctrl_buf_size = false; btcoexist->bt_info.agg_buf_size = 5; btcoexist->bt_info.increase_scan_dev_num = false; + + btcoexist->btc_get_bt_coex_supported_feature = + halbtc_get_bt_coex_supported_feature; + btcoexist->btc_get_bt_coex_supported_version = + halbtc_get_bt_coex_supported_version; + btcoexist->btc_get_ant_det_val_from_bt = halbtc_get_ant_det_val_from_bt; + btcoexist->btc_get_ble_scan_type_from_bt = + halbtc_get_ble_scan_type_from_bt; + btcoexist->btc_get_ble_scan_para_from_bt = + halbtc_get_ble_scan_para_from_bt; + btcoexist->btc_get_bt_afh_map_from_bt = + halbtc_get_bt_afh_map_from_bt; + + init_completion(&btcoexist->bt_mp_comp); + return true; } bool exhalbtc_bind_bt_coex_withadapter(void *adapter) { - struct btc_coexist *btcoexist = &gl_bt_coexist; struct rtl_priv *rtlpriv = adapter; - u8 ant_num = 2, chip_type; + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + u8 ant_num = 2, chip_type, single_ant_path = 0; + + if (!btcoexist) + return false; if (btcoexist->binded) return false; @@ -1042,10 +1292,16 @@ bool exhalbtc_bind_bt_coex_withadapter(void *adapter) btcoexist->bt_info.miracast_plus_bt = false; chip_type = rtl_get_hwpg_bt_type(rtlpriv); - exhalbtc_set_chip_type(chip_type); + exhalbtc_set_chip_type(btcoexist, chip_type); ant_num = rtl_get_hwpg_ant_num(rtlpriv); exhalbtc_set_ant_num(rtlpriv, BT_COEX_ANT_TYPE_PG, ant_num); + /* set default antenna position to main port */ + btcoexist->board_info.btdm_ant_pos = BTC_ANTENNA_AT_MAIN_PORT; + + single_ant_path = rtl_get_hwpg_single_ant_path(rtlpriv); + exhalbtc_set_single_ant_path(btcoexist, single_ant_path); + if (rtl_get_hwpg_package_type(rtlpriv) == 0) btcoexist->board_info.tfbga_package = false; else if (rtl_get_hwpg_package_type(rtlpriv) == 1) @@ -1060,6 +1316,8 @@ bool exhalbtc_bind_bt_coex_withadapter(void *adapter) RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_LOUD, "[BTCoex], Package Type = Non-TFBGA\n"); + btcoexist->board_info.rfe_type = rtl_get_hwpg_rfe_type(rtlpriv); + return true; } @@ -1550,30 +1808,25 @@ void exhalbtc_stack_update_profile_info(void) { } -void exhalbtc_update_min_bt_rssi(s8 bt_rssi) +void exhalbtc_update_min_bt_rssi(struct btc_coexist *btcoexist, s8 bt_rssi) { - struct btc_coexist *btcoexist = &gl_bt_coexist; - if (!halbtc_is_bt_coexist_available(btcoexist)) return; btcoexist->stack_info.min_bt_rssi = bt_rssi; } -void exhalbtc_set_hci_version(u16 hci_version) +void exhalbtc_set_hci_version(struct btc_coexist *btcoexist, u16 hci_version) { - struct btc_coexist *btcoexist = &gl_bt_coexist; - if (!halbtc_is_bt_coexist_available(btcoexist)) return; btcoexist->stack_info.hci_version = hci_version; } -void exhalbtc_set_bt_patch_version(u16 bt_hci_version, u16 bt_patch_version) +void exhalbtc_set_bt_patch_version(struct btc_coexist *btcoexist, + u16 bt_hci_version, u16 bt_patch_version) { - struct btc_coexist *btcoexist = &gl_bt_coexist; - if (!halbtc_is_bt_coexist_available(btcoexist)) return; @@ -1581,7 +1834,7 @@ void exhalbtc_set_bt_patch_version(u16 bt_hci_version, u16 bt_patch_version) btcoexist->bt_info.bt_hci_ver = bt_hci_version; } -void exhalbtc_set_chip_type(u8 chip_type) +void exhalbtc_set_chip_type(struct btc_coexist *btcoexist, u8 chip_type) { switch (chip_type) { default: @@ -1589,48 +1842,54 @@ void exhalbtc_set_chip_type(u8 chip_type) case BT_ISSC_3WIRE: case BT_ACCEL: case BT_RTL8756: - gl_bt_coexist.board_info.bt_chip_type = BTC_CHIP_UNDEF; + btcoexist->board_info.bt_chip_type = BTC_CHIP_UNDEF; break; case BT_CSR_BC4: - gl_bt_coexist.board_info.bt_chip_type = BTC_CHIP_CSR_BC4; + btcoexist->board_info.bt_chip_type = BTC_CHIP_CSR_BC4; break; case BT_CSR_BC8: - gl_bt_coexist.board_info.bt_chip_type = BTC_CHIP_CSR_BC8; + btcoexist->board_info.bt_chip_type = BTC_CHIP_CSR_BC8; break; case BT_RTL8723A: - gl_bt_coexist.board_info.bt_chip_type = BTC_CHIP_RTL8723A; + btcoexist->board_info.bt_chip_type = BTC_CHIP_RTL8723A; break; case BT_RTL8821A: - gl_bt_coexist.board_info.bt_chip_type = BTC_CHIP_RTL8821; + btcoexist->board_info.bt_chip_type = BTC_CHIP_RTL8821; break; case BT_RTL8723B: - gl_bt_coexist.board_info.bt_chip_type = BTC_CHIP_RTL8723B; + btcoexist->board_info.bt_chip_type = BTC_CHIP_RTL8723B; break; } } void exhalbtc_set_ant_num(struct rtl_priv *rtlpriv, u8 type, u8 ant_num) { + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; + if (BT_COEX_ANT_TYPE_PG == type) { - gl_bt_coexist.board_info.pg_ant_num = ant_num; - gl_bt_coexist.board_info.btdm_ant_num = ant_num; + btcoexist->board_info.pg_ant_num = ant_num; + btcoexist->board_info.btdm_ant_num = ant_num; } else if (BT_COEX_ANT_TYPE_ANTDIV == type) { - gl_bt_coexist.board_info.btdm_ant_num = ant_num; + btcoexist->board_info.btdm_ant_num = ant_num; } else if (type == BT_COEX_ANT_TYPE_DETECTED) { - gl_bt_coexist.board_info.btdm_ant_num = ant_num; + btcoexist->board_info.btdm_ant_num = ant_num; if (rtlpriv->cfg->mod_params->ant_sel == 1) - gl_bt_coexist.board_info.btdm_ant_pos = + btcoexist->board_info.btdm_ant_pos = BTC_ANTENNA_AT_AUX_PORT; else - gl_bt_coexist.board_info.btdm_ant_pos = + btcoexist->board_info.btdm_ant_pos = BTC_ANTENNA_AT_MAIN_PORT; } } /* Currently used by 8723b only, S0 or S1 */ -void exhalbtc_set_single_ant_path(u8 single_ant_path) +void exhalbtc_set_single_ant_path(struct btc_coexist *btcoexist, + u8 single_ant_path) { - gl_bt_coexist.board_info.single_ant_path = single_ant_path; + btcoexist->board_info.single_ant_path = single_ant_path; } void exhalbtc_display_bt_coex_info(struct btc_coexist *btcoexist, diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h index ea12b9d63a73..57caaf130a46 100644 --- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h +++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h @@ -103,8 +103,6 @@ enum btc_msg_type { BTC_MSG_MAX }; -extern u32 btc_dbg_type[]; - /* following is for BTC_MSG_INTERFACE */ #define INTF_INIT BIT0 #define INTF_NOTIFY BIT2 @@ -153,6 +151,8 @@ struct btc_board_info { u8 btdm_ant_pos; u8 single_ant_path; /* current used for 8723b only, 1=>s0, 0=>s1 */ bool tfbga_package; + + u8 rfe_type; }; enum btc_dbg_opcode { @@ -280,6 +280,8 @@ enum btc_get_type { BTC_GET_U4_VENDOR, BTC_GET_U4_SUPPORTED_VERSION, BTC_GET_U4_SUPPORTED_FEATURE, + BTC_GET_U4_BT_DEVICE_INFO, + BTC_GET_U4_BT_FORBIDDEN_SLOT_VAL, BTC_GET_U4_WIFI_IQK_TOTAL, BTC_GET_U4_WIFI_IQK_OK, BTC_GET_U4_WIFI_IQK_FAIL, @@ -423,50 +425,6 @@ enum btc_notify_type_stack_operation { BTC_STACK_OP_MAX }; -typedef u8 (*bfp_btc_r1)(void *btc_context, u32 reg_addr); - -typedef u16 (*bfp_btc_r2)(void *btc_context, u32 reg_addr); - -typedef u32 (*bfp_btc_r4)(void *btc_context, u32 reg_addr); - -typedef void (*bfp_btc_w1)(void *btc_context, u32 reg_addr, u32 data); - -typedef void (*bfp_btc_w1_bit_mak)(void *btc_context, u32 reg_addr, - u32 bit_mask, u8 data1b); - -typedef void (*bfp_btc_w2)(void *btc_context, u32 reg_addr, u16 data); - -typedef void (*bfp_btc_w4)(void *btc_context, u32 reg_addr, u32 data); - -typedef void (*bfp_btc_local_reg_w1)(void *btc_context, u32 reg_addr, u8 data); -typedef void (*bfp_btc_wr_1byte_bit_mask)(void *btc_context, u32 reg_addr, - u8 bit_mask, u8 data); - -typedef void (*bfp_btc_set_bb_reg)(void *btc_context, u32 reg_addr, - u32 bit_mask, u32 data); - -typedef u32 (*bfp_btc_get_bb_reg)(void *btc_context, u32 reg_addr, - u32 bit_mask); - -typedef void (*bfp_btc_set_rf_reg)(void *btc_context, u8 rf_path, u32 reg_addr, - u32 bit_mask, u32 data); - -typedef u32 (*bfp_btc_get_rf_reg)(void *btc_context, u8 rf_path, - u32 reg_addr, u32 bit_mask); - -typedef void (*bfp_btc_fill_h2c)(void *btc_context, u8 element_id, - u32 cmd_len, u8 *cmd_buffer); - -typedef bool (*bfp_btc_get)(void *btcoexist, u8 get_type, void *out_buf); - -typedef bool (*bfp_btc_set)(void *btcoexist, u8 set_type, void *in_buf); - -typedef void (*bfp_btc_set_bt_reg)(void *btc_context, u8 reg_type, u32 offset, - u32 value); - -typedef void (*bfp_btc_disp_dbg_msg)(void *btcoexist, u8 disp_type, - struct seq_file *m); - struct btc_bt_info { bool bt_disabled; u8 rssi_adjust_for_agc_table_on; @@ -498,6 +456,17 @@ struct btc_bt_info { u8 lps_val; u8 rpwm_val; u32 ra_mask; + + u32 afh_map_l; + u32 afh_map_m; + u16 afh_map_h; + u32 bt_supported_feature; + u32 bt_supported_version; + u32 bt_device_info; + u32 bt_forb_slot_val; + u8 bt_ant_det_val; + u8 bt_ble_scan_type; + u32 bt_ble_scan_para; }; struct btc_stack_info { @@ -553,6 +522,40 @@ enum btc_antenna_pos { BTC_ANTENNA_AT_AUX_PORT = 0x2, }; +enum btc_mp_h2c_op_code { + BT_OP_GET_BT_VERSION = 0, + BT_OP_WRITE_REG_ADDR = 12, + BT_OP_WRITE_REG_VALUE = 13, + BT_OP_READ_REG = 17, + BT_OP_GET_AFH_MAP_L = 30, + BT_OP_GET_AFH_MAP_M = 31, + BT_OP_GET_AFH_MAP_H = 32, + BT_OP_GET_BT_COEX_SUPPORTED_FEATURE = 42, + BT_OP_GET_BT_COEX_SUPPORTED_VERSION = 43, + BT_OP_GET_BT_ANT_DET_VAL = 44, + BT_OP_GET_BT_BLE_SCAN_PARA = 45, + BT_OP_GET_BT_BLE_SCAN_TYPE = 46, + BT_OP_GET_BT_DEVICE_INFO = 48, + BT_OP_GET_BT_FORBIDDEN_SLOT_VAL = 49, + BT_OP_MAX +}; + +enum btc_mp_h2c_req_num { + /* 4 bits only */ + BT_SEQ_DONT_CARE = 0, + BT_SEQ_GET_BT_VERSION = 0xE, + BT_SEQ_GET_AFH_MAP_L = 0x5, + BT_SEQ_GET_AFH_MAP_M = 0x6, + BT_SEQ_GET_AFH_MAP_H = 0x9, + BT_SEQ_GET_BT_COEX_SUPPORTED_FEATURE = 0x7, + BT_SEQ_GET_BT_COEX_SUPPORTED_VERSION = 0x8, + BT_SEQ_GET_BT_ANT_DET_VAL = 0x2, + BT_SEQ_GET_BT_BLE_SCAN_PARA = 0x3, + BT_SEQ_GET_BT_BLE_SCAN_TYPE = 0x4, + BT_SEQ_GET_BT_DEVICE_INFO = 0xA, + BT_SEQ_GET_BT_FORB_SLOT_VAL = 0xB, +}; + struct btc_coexist { /* make sure only one adapter can bind the data context */ bool binded; @@ -576,38 +579,57 @@ struct btc_coexist { struct btc_statistics statistics; u8 pwr_mode_val[10]; - /* function pointers - io related */ - bfp_btc_r1 btc_read_1byte; - bfp_btc_w1 btc_write_1byte; - bfp_btc_w1_bit_mak btc_write_1byte_bitmask; - bfp_btc_r2 btc_read_2byte; - bfp_btc_w2 btc_write_2byte; - bfp_btc_r4 btc_read_4byte; - bfp_btc_w4 btc_write_4byte; - bfp_btc_local_reg_w1 btc_write_local_reg_1byte; - - bfp_btc_set_bb_reg btc_set_bb_reg; - bfp_btc_get_bb_reg btc_get_bb_reg; - - bfp_btc_set_rf_reg btc_set_rf_reg; - bfp_btc_get_rf_reg btc_get_rf_reg; - - bfp_btc_fill_h2c btc_fill_h2c; + struct completion bt_mp_comp; - bfp_btc_disp_dbg_msg btc_disp_dbg_msg; - - bfp_btc_get btc_get; - bfp_btc_set btc_set; - - bfp_btc_set_bt_reg btc_set_bt_reg; + /* function pointers - io related */ + u8 (*btc_read_1byte)(void *btc_context, u32 reg_addr); + void (*btc_write_1byte)(void *btc_context, u32 reg_addr, u32 data); + void (*btc_write_1byte_bitmask)(void *btc_context, u32 reg_addr, + u32 bit_mask, u8 data1b); + u16 (*btc_read_2byte)(void *btc_context, u32 reg_addr); + void (*btc_write_2byte)(void *btc_context, u32 reg_addr, u16 data); + u32 (*btc_read_4byte)(void *btc_context, u32 reg_addr); + void (*btc_write_4byte)(void *btc_context, u32 reg_addr, u32 data); + + void (*btc_write_local_reg_1byte)(void *btc_context, u32 reg_addr, + u8 data); + void (*btc_set_bb_reg)(void *btc_context, u32 reg_addr, + u32 bit_mask, u32 data); + u32 (*btc_get_bb_reg)(void *btc_context, u32 reg_addr, + u32 bit_mask); + void (*btc_set_rf_reg)(void *btc_context, u8 rf_path, u32 reg_addr, + u32 bit_mask, u32 data); + u32 (*btc_get_rf_reg)(void *btc_context, u8 rf_path, + u32 reg_addr, u32 bit_mask); + + void (*btc_fill_h2c)(void *btc_context, u8 element_id, + u32 cmd_len, u8 *cmd_buffer); + + void (*btc_disp_dbg_msg)(void *btcoexist, u8 disp_type, + struct seq_file *m); + + bool (*btc_get)(void *btcoexist, u8 get_type, void *out_buf); + bool (*btc_set)(void *btcoexist, u8 set_type, void *in_buf); + + void (*btc_set_bt_reg)(void *btc_context, u8 reg_type, u32 offset, + u32 value); + u32 (*btc_get_bt_coex_supported_feature)(void *btcoexist); + u32 (*btc_get_bt_coex_supported_version)(void *btcoexist); + u8 (*btc_get_ant_det_val_from_bt)(void *btcoexist); + u8 (*btc_get_ble_scan_type_from_bt)(void *btcoexist); + u32 (*btc_get_ble_scan_para_from_bt)(void *btcoexist, u8 scan_type); + bool (*btc_get_bt_afh_map_from_bt)(void *btcoexist, u8 map_type, + u8 *afh_map); }; bool halbtc_is_wifi_uplink(struct rtl_priv *adapter); -extern struct btc_coexist gl_bt_coexist; +#define rtl_btc_coexist(rtlpriv) \ + ((struct btc_coexist *)((rtlpriv)->btcoexist.btc_context)) -bool exhalbtc_initlize_variables(void); +bool exhalbtc_initlize_variables(struct rtl_priv *rtlpriv); bool exhalbtc_bind_bt_coex_withadapter(void *adapter); +void exhalbtc_power_on_setting(struct btc_coexist *btcoexist); void exhalbtc_init_hw_config(struct btc_coexist *btcoexist, bool wifi_only); void exhalbtc_init_coex_dm(struct btc_coexist *btcoexist); void exhalbtc_ips_notify(struct btc_coexist *btcoexist, u8 type); @@ -627,11 +649,12 @@ void exhalbtc_periodical(struct btc_coexist *btcoexist); void exhalbtc_dbg_control(struct btc_coexist *btcoexist, u8 code, u8 len, u8 *data); void exhalbtc_stack_update_profile_info(void); -void exhalbtc_set_hci_version(u16 hci_version); -void exhalbtc_set_bt_patch_version(u16 bt_hci_version, u16 bt_patch_version); -void exhalbtc_update_min_bt_rssi(s8 bt_rssi); -void exhalbtc_set_bt_exist(bool bt_exist); -void exhalbtc_set_chip_type(u8 chip_type); +void exhalbtc_set_hci_version(struct btc_coexist *btcoexist, u16 hci_version); +void exhalbtc_set_bt_patch_version(struct btc_coexist *btcoexist, + u16 bt_hci_version, u16 bt_patch_version); +void exhalbtc_update_min_bt_rssi(struct btc_coexist *btcoexist, s8 bt_rssi); +void exhalbtc_set_bt_exist(struct btc_coexist *btcoexist, bool bt_exist); +void exhalbtc_set_chip_type(struct btc_coexist *btcoexist, u8 chip_type); void exhalbtc_set_ant_num(struct rtl_priv *rtlpriv, u8 type, u8 ant_num); void exhalbtc_display_bt_coex_info(struct btc_coexist *btcoexist, struct seq_file *m); @@ -639,6 +662,7 @@ void exhalbtc_signal_compensation(struct btc_coexist *btcoexist, u8 *rssi_wifi, u8 *rssi_bt); void exhalbtc_lps_leave(struct btc_coexist *btcoexist); void exhalbtc_low_wifi_traffic_notify(struct btc_coexist *btcoexist); -void exhalbtc_set_single_ant_path(u8 single_ant_path); +void exhalbtc_set_single_ant_path(struct btc_coexist *btcoexist, + u8 single_ant_path); #endif diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c index 4d9e33078d4f..714c0de099e5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c +++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c @@ -31,7 +31,9 @@ static struct rtl_btc_ops rtl_btc_operation = { .btc_init_variables = rtl_btc_init_variables, + .btc_deinit_variables = rtl_btc_deinit_variables, .btc_init_hal_vars = rtl_btc_init_hal_vars, + .btc_power_on_setting = rtl_btc_power_on_setting, .btc_init_hw_config = rtl_btc_init_hw_config, .btc_ips_notify = rtl_btc_ips_notify, .btc_lps_notify = rtl_btc_lps_notify, @@ -57,58 +59,116 @@ static struct rtl_btc_ops rtl_btc_operation = { void rtl_btc_display_bt_coex_info(struct rtl_priv *rtlpriv, struct seq_file *m) { - exhalbtc_display_bt_coex_info(&gl_bt_coexist, m); + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) { + seq_puts(m, "btc_coexist context is NULL!\n"); + return; + } + + exhalbtc_display_bt_coex_info(btcoexist, m); } void rtl_btc_record_pwr_mode(struct rtl_priv *rtlpriv, u8 *buf, u8 len) { + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); u8 safe_len; - safe_len = sizeof(gl_bt_coexist.pwr_mode_val); + if (!btcoexist) + return; + + safe_len = sizeof(btcoexist->pwr_mode_val); if (safe_len > len) safe_len = len; - memcpy(gl_bt_coexist.pwr_mode_val, buf, safe_len); + memcpy(btcoexist->pwr_mode_val, buf, safe_len); } u8 rtl_btc_get_lps_val(struct rtl_priv *rtlpriv) { - return gl_bt_coexist.bt_info.lps_val; + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return 0; + + return btcoexist->bt_info.lps_val; } u8 rtl_btc_get_rpwm_val(struct rtl_priv *rtlpriv) { - return gl_bt_coexist.bt_info.rpwm_val; + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return 0; + + return btcoexist->bt_info.rpwm_val; } bool rtl_btc_is_bt_ctrl_lps(struct rtl_priv *rtlpriv) { - return gl_bt_coexist.bt_info.bt_ctrl_lps; + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return false; + + return btcoexist->bt_info.bt_ctrl_lps; } bool rtl_btc_is_bt_lps_on(struct rtl_priv *rtlpriv) { - return gl_bt_coexist.bt_info.bt_lps_on; + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return false; + + return btcoexist->bt_info.bt_lps_on; } void rtl_btc_get_ampdu_cfg(struct rtl_priv *rtlpriv, u8 *reject_agg, u8 *ctrl_agg_size, u8 *agg_size) { + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) { + *reject_agg = false; + *ctrl_agg_size = false; + return; + } + if (reject_agg) - *reject_agg = gl_bt_coexist.bt_info.reject_agg_pkt; + *reject_agg = btcoexist->bt_info.reject_agg_pkt; if (ctrl_agg_size) - *ctrl_agg_size = gl_bt_coexist.bt_info.bt_ctrl_agg_buf_size; + *ctrl_agg_size = btcoexist->bt_info.bt_ctrl_agg_buf_size; if (agg_size) - *agg_size = gl_bt_coexist.bt_info.agg_buf_size; + *agg_size = btcoexist->bt_info.agg_buf_size; +} + +static void rtl_btc_alloc_variable(struct rtl_priv *rtlpriv, bool wifi_only) +{ + rtlpriv->btcoexist.btc_context = + kzalloc(sizeof(struct btc_coexist), GFP_KERNEL); +} + +static void rtl_btc_free_variable(struct rtl_priv *rtlpriv) +{ + kfree(rtlpriv->btcoexist.btc_context); + rtlpriv->btcoexist.btc_context = NULL; } void rtl_btc_init_variables(struct rtl_priv *rtlpriv) { - exhalbtc_initlize_variables(); + rtl_btc_alloc_variable(rtlpriv, false); + + exhalbtc_initlize_variables(rtlpriv); exhalbtc_bind_bt_coex_withadapter(rtlpriv); } +void rtl_btc_deinit_variables(struct rtl_priv *rtlpriv) +{ + rtl_btc_free_variable(rtlpriv); +} + void rtl_btc_init_hal_vars(struct rtl_priv *rtlpriv) { /* move ant_num, bt_type and single_ant_path to @@ -116,67 +176,125 @@ void rtl_btc_init_hal_vars(struct rtl_priv *rtlpriv) */ } +void rtl_btc_power_on_setting(struct rtl_priv *rtlpriv) +{ + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; + + exhalbtc_power_on_setting(btcoexist); +} + void rtl_btc_init_hw_config(struct rtl_priv *rtlpriv) { + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + u8 bt_exist; bt_exist = rtl_get_hwpg_bt_exist(rtlpriv); RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, "%s, bt_exist is %d\n", __func__, bt_exist); - exhalbtc_init_hw_config(&gl_bt_coexist, !bt_exist); - exhalbtc_init_coex_dm(&gl_bt_coexist); + if (!btcoexist) + return; + + exhalbtc_init_hw_config(btcoexist, !bt_exist); + exhalbtc_init_coex_dm(btcoexist); } void rtl_btc_ips_notify(struct rtl_priv *rtlpriv, u8 type) { - exhalbtc_ips_notify(&gl_bt_coexist, type); + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; + + exhalbtc_ips_notify(btcoexist, type); } void rtl_btc_lps_notify(struct rtl_priv *rtlpriv, u8 type) { - exhalbtc_lps_notify(&gl_bt_coexist, type); + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; + + exhalbtc_lps_notify(btcoexist, type); } void rtl_btc_scan_notify(struct rtl_priv *rtlpriv, u8 scantype) { - exhalbtc_scan_notify(&gl_bt_coexist, scantype); + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; + + exhalbtc_scan_notify(btcoexist, scantype); } void rtl_btc_connect_notify(struct rtl_priv *rtlpriv, u8 action) { - exhalbtc_connect_notify(&gl_bt_coexist, action); + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; + + exhalbtc_connect_notify(btcoexist, action); } void rtl_btc_mediastatus_notify(struct rtl_priv *rtlpriv, enum rt_media_status mstatus) { - exhalbtc_mediastatus_notify(&gl_bt_coexist, mstatus); + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; + + exhalbtc_mediastatus_notify(btcoexist, mstatus); } void rtl_btc_periodical(struct rtl_priv *rtlpriv) { + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; + /*rtl_bt_dm_monitor();*/ - exhalbtc_periodical(&gl_bt_coexist); + exhalbtc_periodical(btcoexist); } -void rtl_btc_halt_notify(void) +void rtl_btc_halt_notify(struct rtl_priv *rtlpriv) { - struct btc_coexist *btcoexist = &gl_bt_coexist; + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; exhalbtc_halt_notify(btcoexist); } void rtl_btc_btinfo_notify(struct rtl_priv *rtlpriv, u8 *tmp_buf, u8 length) { - exhalbtc_bt_info_notify(&gl_bt_coexist, tmp_buf, length); + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; + + exhalbtc_bt_info_notify(btcoexist, tmp_buf, length); } void rtl_btc_btmpinfo_notify(struct rtl_priv *rtlpriv, u8 *tmp_buf, u8 length) { + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); u8 extid, seq, len; u16 bt_real_fw_ver; u8 bt_fw_ver; + u8 *data; + + if (!btcoexist) + return; if ((length < 4) || (!tmp_buf)) return; @@ -188,20 +306,70 @@ void rtl_btc_btmpinfo_notify(struct rtl_priv *rtlpriv, u8 *tmp_buf, u8 length) len = tmp_buf[1] >> 4; seq = tmp_buf[2] >> 4; + data = &tmp_buf[3]; /* BT Firmware version response */ - if (seq == 0x0E) { + switch (seq) { + case BT_SEQ_GET_BT_VERSION: bt_real_fw_ver = tmp_buf[3] | (tmp_buf[4] << 8); bt_fw_ver = tmp_buf[5]; - gl_bt_coexist.bt_info.bt_real_fw_ver = bt_real_fw_ver; - gl_bt_coexist.bt_info.bt_fw_ver = bt_fw_ver; + btcoexist->bt_info.bt_real_fw_ver = bt_real_fw_ver; + btcoexist->bt_info.bt_fw_ver = bt_fw_ver; + break; + case BT_SEQ_GET_AFH_MAP_L: + btcoexist->bt_info.afh_map_l = le32_to_cpu(*(__le32 *)data); + break; + case BT_SEQ_GET_AFH_MAP_M: + btcoexist->bt_info.afh_map_m = le32_to_cpu(*(__le32 *)data); + break; + case BT_SEQ_GET_AFH_MAP_H: + btcoexist->bt_info.afh_map_h = le16_to_cpu(*(__le16 *)data); + break; + case BT_SEQ_GET_BT_COEX_SUPPORTED_FEATURE: + btcoexist->bt_info.bt_supported_feature = tmp_buf[3] | + (tmp_buf[4] << 8); + break; + case BT_SEQ_GET_BT_COEX_SUPPORTED_VERSION: + btcoexist->bt_info.bt_supported_version = tmp_buf[3] | + (tmp_buf[4] << 8); + break; + case BT_SEQ_GET_BT_ANT_DET_VAL: + btcoexist->bt_info.bt_ant_det_val = tmp_buf[3]; + break; + case BT_SEQ_GET_BT_BLE_SCAN_PARA: + btcoexist->bt_info.bt_ble_scan_para = tmp_buf[3] | + (tmp_buf[4] << 8) | + (tmp_buf[5] << 16) | + (tmp_buf[6] << 24); + break; + case BT_SEQ_GET_BT_BLE_SCAN_TYPE: + btcoexist->bt_info.bt_ble_scan_type = tmp_buf[3]; + break; + case BT_SEQ_GET_BT_DEVICE_INFO: + btcoexist->bt_info.bt_device_info = + le32_to_cpu(*(__le32 *)data); + break; + case BT_OP_GET_BT_FORBIDDEN_SLOT_VAL: + btcoexist->bt_info.bt_forb_slot_val = + le32_to_cpu(*(__le32 *)data); + break; } + + RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_LOUD, + "btmpinfo complete req_num=%d\n", seq); + + complete(&btcoexist->bt_mp_comp); } bool rtl_btc_is_limited_dig(struct rtl_priv *rtlpriv) { - return gl_bt_coexist.bt_info.limited_dig; + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return false; + + return btcoexist->bt_info.limited_dig; } bool rtl_btc_is_disable_edca_turbo(struct rtl_priv *rtlpriv) @@ -233,8 +401,13 @@ bool rtl_btc_is_disable_edca_turbo(struct rtl_priv *rtlpriv) bool rtl_btc_is_bt_disabled(struct rtl_priv *rtlpriv) { + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return true; + /* It seems 'bt_disabled' is never be initialized or set. */ - if (gl_bt_coexist.bt_info.bt_disabled) + if (btcoexist->bt_info.bt_disabled) return true; else return false; @@ -242,7 +415,12 @@ bool rtl_btc_is_bt_disabled(struct rtl_priv *rtlpriv) void rtl_btc_special_packet_notify(struct rtl_priv *rtlpriv, u8 pkt_type) { - return exhalbtc_special_packet_notify(&gl_bt_coexist, pkt_type); + struct btc_coexist *btcoexist = rtl_btc_coexist(rtlpriv); + + if (!btcoexist) + return; + + return exhalbtc_special_packet_notify(btcoexist, pkt_type); } struct rtl_btc_ops *rtl_btc_get_ops_pointer(void) diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.h index 40f1ce8c8a06..8c5098266039 100644 --- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.h +++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.h @@ -28,7 +28,9 @@ #include "halbt_precomp.h" void rtl_btc_init_variables(struct rtl_priv *rtlpriv); +void rtl_btc_deinit_variables(struct rtl_priv *rtlpriv); void rtl_btc_init_hal_vars(struct rtl_priv *rtlpriv); +void rtl_btc_power_on_setting(struct rtl_priv *rtlpriv); void rtl_btc_init_hw_config(struct rtl_priv *rtlpriv); void rtl_btc_ips_notify(struct rtl_priv *rtlpriv, u8 type); void rtl_btc_lps_notify(struct rtl_priv *rtlpriv, u8 type); @@ -37,7 +39,7 @@ void rtl_btc_connect_notify(struct rtl_priv *rtlpriv, u8 action); void rtl_btc_mediastatus_notify(struct rtl_priv *rtlpriv, enum rt_media_status mstatus); void rtl_btc_periodical(struct rtl_priv *rtlpriv); -void rtl_btc_halt_notify(void); +void rtl_btc_halt_notify(struct rtl_priv *rtlpriv); void rtl_btc_btinfo_notify(struct rtl_priv *rtlpriv, u8 *tmpbuf, u8 length); void rtl_btc_btmpinfo_notify(struct rtl_priv *rtlpriv, u8 *tmp_buf, u8 length); bool rtl_btc_is_limited_dig(struct rtl_priv *rtlpriv); diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index a78b828f531a..a16aa94273e8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1162,6 +1162,8 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, RT_TRACE(rtlpriv, COMP_MAC80211, DBG_DMESG, "BSS_CHANGED_ASSOC\n"); } else { + struct cfg80211_bss *bss = NULL; + mstatus = RT_MEDIA_DISCONNECT; if (mac->link_state == MAC80211_LINKED) @@ -1169,6 +1171,22 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, if (ppsc->p2p_ps_info.p2p_ps_mode > P2P_PS_NONE) rtl_p2p_ps_cmd(hw, P2P_PS_DISABLE); mac->link_state = MAC80211_NOLINK; + + bss = cfg80211_get_bss(hw->wiphy, NULL, + (u8 *)mac->bssid, NULL, 0, + IEEE80211_BSS_TYPE_ESS, + IEEE80211_PRIVACY_OFF); + + RT_TRACE(rtlpriv, COMP_MAC80211, DBG_DMESG, + "bssid = %pMF\n", mac->bssid); + + if (bss) { + cfg80211_unlink_bss(hw->wiphy, bss); + cfg80211_put_bss(hw->wiphy, bss); + RT_TRACE(rtlpriv, COMP_MAC80211, DBG_DMESG, + "cfg80211_unlink !!\n"); + } + eth_zero_addr(mac->bssid); mac->vendor = PEER_UNKNOWN; mac->mode = 0; diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index c1833a501be4..aa1d3ae4937f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1841,7 +1841,10 @@ static void rtl_pci_stop(struct ieee80211_hw *hw) u8 rf_timeout = 0; if (rtlpriv->cfg->ops->get_btc_status()) - rtlpriv->btcoexist.btc_ops->btc_halt_notify(); + rtlpriv->btcoexist.btc_ops->btc_halt_notify(rtlpriv); + + if (rtlpriv->btcoexist.btc_ops) + rtlpriv->btcoexist.btc_ops->btc_deinit_variables(rtlpriv); /*should be before disable interrupt&adapter *and will do it immediately. diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index 24c87fae5382..71af24e2e051 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -51,6 +51,11 @@ bool rtl_ps_enable_nic(struct ieee80211_hw *hw) &rtlmac->retry_long); RT_CLEAR_PS_LEVEL(ppsc, RT_RF_OFF_LEVL_HALT_NIC); + rtlpriv->cfg->ops->switch_channel(hw); + rtlpriv->cfg->ops->set_channel_access(hw); + rtlpriv->cfg->ops->set_bw_mode(hw, + cfg80211_get_chandef_type(&hw->conf.chandef)); + /*<3> Enable Interrupt */ rtlpriv->cfg->ops->enable_interrupt(hw); @@ -289,7 +294,7 @@ void rtl_ips_nic_on(struct ieee80211_hw *hw) cancel_delayed_work(&rtlpriv->works.ips_nic_off_wq); - spin_lock(&rtlpriv->locks.ips_lock); + mutex_lock(&rtlpriv->locks.ips_mutex); if (ppsc->inactiveps) { rtstate = ppsc->rfpwr_state; @@ -306,7 +311,7 @@ void rtl_ips_nic_on(struct ieee80211_hw *hw) ppsc->inactive_pwrstate); } } - spin_unlock(&rtlpriv->locks.ips_lock); + mutex_unlock(&rtlpriv->locks.ips_mutex); } EXPORT_SYMBOL_GPL(rtl_ips_nic_on); @@ -415,7 +420,6 @@ static void rtl_lps_enter_core(struct ieee80211_hw *hw) struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); struct rtl_priv *rtlpriv = rtl_priv(hw); - unsigned long flag; if (!ppsc->fwctrl_lps) return; @@ -436,7 +440,7 @@ static void rtl_lps_enter_core(struct ieee80211_hw *hw) if (mac->link_state != MAC80211_LINKED) return; - spin_lock_irqsave(&rtlpriv->locks.lps_lock, flag); + mutex_lock(&rtlpriv->locks.lps_mutex); /* Don't need to check (ppsc->dot11_psmode == EACTIVE), because * bt_ccoexist may ask to enter lps. @@ -446,7 +450,7 @@ static void rtl_lps_enter_core(struct ieee80211_hw *hw) "Enter 802.11 power save mode...\n"); rtl_lps_set_psmode(hw, EAUTOPS); - spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); + mutex_unlock(&rtlpriv->locks.lps_mutex); } /* Interrupt safe routine to leave the leisure power save mode.*/ @@ -455,9 +459,8 @@ static void rtl_lps_leave_core(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); - unsigned long flag; - spin_lock_irqsave(&rtlpriv->locks.lps_lock, flag); + mutex_lock(&rtlpriv->locks.lps_mutex); if (ppsc->fwctrl_lps) { if (ppsc->dot11_psmode != EACTIVE) { @@ -478,7 +481,7 @@ static void rtl_lps_leave_core(struct ieee80211_hw *hw) rtl_lps_set_psmode(hw, EACTIVE); } } - spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); + mutex_unlock(&rtlpriv->locks.lps_mutex); } /* For sw LPS*/ @@ -568,7 +571,6 @@ void rtl_swlps_rf_awake(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); - unsigned long flag; if (!rtlpriv->psc.swctrl_lps) return; @@ -581,9 +583,9 @@ void rtl_swlps_rf_awake(struct ieee80211_hw *hw) RT_CLEAR_PS_LEVEL(ppsc, RT_PS_LEVEL_ASPM); } - spin_lock_irqsave(&rtlpriv->locks.lps_lock, flag); + mutex_lock(&rtlpriv->locks.lps_mutex); rtl_ps_set_rf_state(hw, ERFON, RF_CHANGE_BY_PS); - spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); + mutex_unlock(&rtlpriv->locks.lps_mutex); } void rtl_swlps_rfon_wq_callback(void *data) @@ -600,7 +602,6 @@ void rtl_swlps_rf_sleep(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); - unsigned long flag; u8 sleep_intv; if (!rtlpriv->psc.sw_ps_enabled) @@ -624,9 +625,9 @@ void rtl_swlps_rf_sleep(struct ieee80211_hw *hw) } spin_unlock(&rtlpriv->locks.rf_ps_lock); - spin_lock_irqsave(&rtlpriv->locks.lps_lock, flag); + mutex_lock(&rtlpriv->locks.lps_mutex); rtl_ps_set_rf_state(hw, ERFSLEEP, RF_CHANGE_BY_PS); - spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); + mutex_unlock(&rtlpriv->locks.lps_mutex); if (ppsc->reg_rfps_level & RT_RF_OFF_LEVL_ASPM && !RT_IN_PS_LEVEL(ppsc, RT_PS_LEVEL_ASPM)) { diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 39b033b3b53a..ce3103bb8ebb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -962,7 +962,6 @@ static void _rtl_usb_tx_preprocess(struct ieee80211_hw *hw, memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc)); if (ieee80211_is_auth(fc)) { RT_TRACE(rtlpriv, COMP_SEND, DBG_DMESG, "MAC80211_LINKING\n"); - rtl_ips_nic_on(hw); } if (rtlpriv->psc.sw_ps_enabled) { diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 0b1c54381a2f..531c86df54d4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -2325,17 +2325,14 @@ struct rtl_hal_cfg { struct rtl_locks { /* mutex */ struct mutex conf_mutex; - struct mutex ps_mutex; + struct mutex ips_mutex; /* mutex for enter/leave IPS */ + struct mutex lps_mutex; /* mutex for enter/leave LPS */ /*spin lock */ - spinlock_t ips_lock; spinlock_t irq_th_lock; - spinlock_t irq_pci_lock; - spinlock_t tx_lock; spinlock_t h2c_lock; spinlock_t rf_ps_lock; spinlock_t rf_lock; - spinlock_t lps_lock; spinlock_t waitq_lock; spinlock_t entry_list_lock; spinlock_t usb_lock; @@ -2348,9 +2345,6 @@ struct rtl_locks { /*Dual mac*/ spinlock_t cck_and_rw_pagea_lock; - /*Easy concurrent*/ - spinlock_t check_sendpkt_lock; - spinlock_t iqk_lock; }; @@ -2506,6 +2500,8 @@ struct rtl_btc_info { struct bt_coexist_info { struct rtl_btc_ops *btc_ops; struct rtl_btc_info btc_info; + /* btc context */ + void *btc_context; /* EEPROM BT info. */ u8 eeprom_bt_coexist; u8 eeprom_bt_type; @@ -2562,7 +2558,9 @@ struct bt_coexist_info { struct rtl_btc_ops { void (*btc_init_variables) (struct rtl_priv *rtlpriv); + void (*btc_deinit_variables)(struct rtl_priv *rtlpriv); void (*btc_init_hal_vars) (struct rtl_priv *rtlpriv); + void (*btc_power_on_setting)(struct rtl_priv *rtlpriv); void (*btc_init_hw_config) (struct rtl_priv *rtlpriv); void (*btc_ips_notify) (struct rtl_priv *rtlpriv, u8 type); void (*btc_lps_notify)(struct rtl_priv *rtlpriv, u8 type); @@ -2571,7 +2569,7 @@ struct rtl_btc_ops { void (*btc_mediastatus_notify) (struct rtl_priv *rtlpriv, enum rt_media_status mstatus); void (*btc_periodical) (struct rtl_priv *rtlpriv); - void (*btc_halt_notify) (void); + void (*btc_halt_notify)(struct rtl_priv *rtlpriv); void (*btc_btinfo_notify) (struct rtl_priv *rtlpriv, u8 *tmp_buf, u8 length); void (*btc_btmpinfo_notify)(struct rtl_priv *rtlpriv, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d53550e612bc..4276ebfff22b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -451,10 +451,13 @@ static void **nvme_pci_iod_list(struct request *req) static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + int nseg = blk_rq_nr_phys_segments(req); unsigned int avg_seg_size; - avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), - blk_rq_nr_phys_segments(req)); + if (nseg == 0) + return false; + + avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) return false; @@ -722,20 +725,19 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, } static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, - struct request *req, struct nvme_rw_command *cmd) + struct request *req, struct nvme_rw_command *cmd, int entries) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - int length = blk_rq_payload_bytes(req); struct dma_pool *pool; struct nvme_sgl_desc *sg_list; struct scatterlist *sg = iod->sg; - int entries = iod->nents, i = 0; dma_addr_t sgl_dma; + int i = 0; /* setting the transfer type as SGL */ cmd->flags = NVME_CMD_SGL_METABUF; - if (length == sg_dma_len(sg)) { + if (entries == 1) { nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg); return BLK_STS_OK; } @@ -775,13 +777,9 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, } nvme_pci_sgl_set_data(&sg_list[i++], sg); - - length -= sg_dma_len(sg); sg = sg_next(sg); - entries--; - } while (length > 0); + } while (--entries > 0); - WARN_ON(entries > 0); return BLK_STS_OK; } @@ -793,6 +791,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, enum dma_data_direction dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; blk_status_t ret = BLK_STS_IOERR; + int nr_mapped; sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); iod->nents = blk_rq_map_sg(q, req, iod->sg); @@ -800,12 +799,13 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, goto out; ret = BLK_STS_RESOURCE; - if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, - DMA_ATTR_NO_WARN)) + nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, + DMA_ATTR_NO_WARN); + if (!nr_mapped) goto out; if (iod->use_sgl) - ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); + ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index b4964b067aec..8f6e8e28996d 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -410,6 +410,10 @@ static struct phy *_of_phy_get(struct device_node *np, int index) if (ret) return ERR_PTR(-ENODEV); + /* This phy type handled by the usb-phy subsystem for now */ + if (of_device_is_compatible(args.np, "usb-nop-xceiv")) + return ERR_PTR(-ENODEV); + mutex_lock(&phy_provider_mutex); phy_provider = of_phy_provider_lookup(args.np); if (IS_ERR(phy_provider) || !try_module_get(phy_provider->owner)) { diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 58476b728c57..c9406852c3e9 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -486,15 +486,28 @@ static int sas_queue_reset(struct domain_device *dev, int reset_type, int sas_eh_abort_handler(struct scsi_cmnd *cmd) { - int res; + int res = TMF_RESP_FUNC_FAILED; struct sas_task *task = TO_SAS_TASK(cmd); struct Scsi_Host *host = cmd->device->host; + struct domain_device *dev = cmd_to_domain_dev(cmd); struct sas_internal *i = to_sas_internal(host->transportt); + unsigned long flags; if (!i->dft->lldd_abort_task) return FAILED; - res = i->dft->lldd_abort_task(task); + spin_lock_irqsave(host->host_lock, flags); + /* We cannot do async aborts for SATA devices */ + if (dev_is_sata(dev) && !host->host_eh_scheduled) { + spin_unlock_irqrestore(host->host_lock, flags); + return FAILED; + } + spin_unlock_irqrestore(host->host_lock, flags); + + if (task) + res = i->dft->lldd_abort_task(task); + else + SAS_DPRINTK("no task to abort\n"); if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE) return SUCCESS; diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig index f48a2ee587a4..ee18428a051f 100644 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@ -31,7 +31,7 @@ config SSB_BLOCKIO config SSB_PCIHOST_POSSIBLE bool - depends on SSB && (PCI = y || PCI = SSB) + depends on SSB && (PCI = y || PCI = SSB) && PCI_DRIVERS_LEGACY default y config SSB_PCIHOST diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 33ac2b186b85..5727b186b3ca 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -904,7 +904,7 @@ static void vhost_dev_lock_vqs(struct vhost_dev *d) { int i = 0; for (i = 0; i < d->nvqs; ++i) - mutex_lock(&d->vqs[i]->mutex); + mutex_lock_nested(&d->vqs[i]->mutex, i); } static void vhost_dev_unlock_vqs(struct vhost_dev *d) @@ -1015,6 +1015,10 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, vhost_iotlb_notify_vq(dev, msg); break; case VHOST_IOTLB_INVALIDATE: + if (!dev->iotlb) { + ret = -EFAULT; + break; + } vhost_vq_meta_reset(dev); vhost_del_umem_range(dev->iotlb, msg->iova, msg->iova + msg->size - 1); diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index f650e475d8f0..fdf2aad73470 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -60,10 +60,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) gi->gid[i] = exp->ex_anon_gid; else gi->gid[i] = rqgi->gid[i]; - - /* Each thread allocates its own gi, no race */ - groups_sort(gi); } + + /* Each thread allocates its own gi, no race */ + groups_sort(gi); } else { gi = get_group_info(rqgi); } diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index ded456f17de6..c584ad8d023c 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -162,7 +162,7 @@ static ssize_t orangefs_devreq_read(struct file *file, struct orangefs_kernel_op_s *op, *temp; __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION; static __s32 magic = ORANGEFS_DEVREQ_MAGIC; - struct orangefs_kernel_op_s *cur_op = NULL; + struct orangefs_kernel_op_s *cur_op; unsigned long ret; /* We do not support blocking IO. */ @@ -186,6 +186,7 @@ static ssize_t orangefs_devreq_read(struct file *file, return -EAGAIN; restart: + cur_op = NULL; /* Get next op (if any) from top of list. */ spin_lock(&orangefs_request_list_lock); list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 835c6e148afc..0577d6dba8c8 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -29,10 +29,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s */ void purge_waiting_ops(void) { - struct orangefs_kernel_op_s *op; + struct orangefs_kernel_op_s *op, *tmp; spin_lock(&orangefs_request_list_lock); - list_for_each_entry(op, &orangefs_request_list, list) { + list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) { gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2-client-core: purging op tag %llu %s\n", llu(op->tag), diff --git a/fs/proc/array.c b/fs/proc/array.c index 79375fc115d2..d67a72dcb92c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -430,8 +430,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * safe because the task has stopped executing permanently. */ if (permitted && (task->flags & PF_DUMPCORE)) { - eip = KSTK_EIP(task); - esp = KSTK_ESP(task); + if (try_get_task_stack(task)) { + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + put_task_stack(task); + } } } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index dc1ebfeeb5ec..f05b9b6cd43f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -56,6 +56,8 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) #define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \ acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) +#define ACPI_HANDLE_FWNODE(fwnode) \ + acpi_device_handle(to_acpi_device_node(fwnode)) static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) { @@ -626,6 +628,7 @@ int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count) #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) +#define ACPI_HANDLE_FWNODE(fwnode) (NULL) #define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), struct fwnode_handle; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5c2c104dc2c5..66df387106de 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -234,6 +234,8 @@ struct bpf_prog_offload { struct list_head offloads; bool dev_state; const struct bpf_prog_offload_ops *dev_ops; + void *jited_image; + u32 jited_len; }; struct bpf_prog_aux { @@ -584,6 +586,8 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog); int bpf_prog_offload_info_fill(struct bpf_prog_info *info, struct bpf_prog *prog); +int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map); + int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value); int bpf_map_offload_update_elem(struct bpf_map *map, void *key, void *value, u64 flags); diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 2272ded07496..631354acfa72 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -219,7 +219,7 @@ /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) -#ifdef RANDSTRUCT_PLUGIN +#if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__) #define __randomize_layout __attribute__((randomize_layout)) #define __no_randomize_layout __attribute__((no_randomize_layout)) #endif diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 4178d2493547..5e335b6203f4 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -71,7 +71,7 @@ extern void delayacct_init(void); extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); -extern void __delayacct_blkio_end(void); +extern void __delayacct_blkio_end(struct task_struct *); extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); extern void __delayacct_freepages_start(void); @@ -122,10 +122,10 @@ static inline void delayacct_blkio_start(void) __delayacct_blkio_start(); } -static inline void delayacct_blkio_end(void) +static inline void delayacct_blkio_end(struct task_struct *p) { if (current->delays) - __delayacct_blkio_end(); + __delayacct_blkio_end(p); delayacct_clear_flag(DELAYACCT_PF_BLKIO); } @@ -169,7 +169,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) {} static inline void delayacct_blkio_start(void) {} -static inline void delayacct_blkio_end(void) +static inline void delayacct_blkio_end(struct task_struct *p) {} static inline int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2bab81951ced..3319df9727aa 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -332,6 +332,8 @@ extern int ftrace_text_reserved(const void *start, const void *end); extern int ftrace_nr_registered_ops(void); +struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr); + bool is_ftrace_trampoline(unsigned long addr); /* diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 1ac5bf95bfdd..e16fe7d44a71 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -173,7 +173,7 @@ static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) } int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); -int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); +int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *); void devinet_init(void); struct in_device *inetdev_by_index(struct net *, int); __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index acd829d8613b..199bfcd2f2ce 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1031,7 +1031,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_hairpin_queues[0x5]; u8 reserved_at_3c8[0x3]; u8 log_max_hairpin_wq_data_sz[0x5]; - u8 reserved_at_3d0[0xb]; + u8 reserved_at_3d0[0x3]; + u8 log_max_hairpin_num_packets[0x5]; + u8 reserved_at_3d8[0x3]; u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; @@ -1172,7 +1174,9 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_at_120[0xb]; + u8 reserved_at_120[0x3]; + u8 log_hairpin_num_packets[0x5]; + u8 reserved_at_128[0x3]; u8 log_hairpin_data_sz[0x5]; u8 reserved_at_130[0x5]; diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index a228310c1968..7e8f281f8c00 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -77,15 +77,19 @@ void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn); struct mlx5_hairpin_params { u8 log_data_size; + u8 log_num_packets; u16 q_counter; + int num_channels; }; struct mlx5_hairpin { struct mlx5_core_dev *func_mdev; struct mlx5_core_dev *peer_mdev; - u32 rqn; - u32 sqn; + int num_channels; + + u32 *rqn; + u32 *sqn; }; struct mlx5_hairpin * diff --git a/include/linux/net.h b/include/linux/net.h index caeb159abda5..68acc54976bf 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -306,7 +306,6 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); -int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); /* Routine returns the IP overhead imposed by a (caller-protected) socket. */ diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index 1c7e45016120..bebeaad897cc 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -244,10 +244,17 @@ static inline int net_dim_stats_compare(struct net_dim_stats *curr, return (curr->bpms > prev->bpms) ? NET_DIM_STATS_BETTER : NET_DIM_STATS_WORSE; + if (!prev->ppms) + return curr->ppms ? NET_DIM_STATS_BETTER : + NET_DIM_STATS_SAME; + if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) return (curr->ppms > prev->ppms) ? NET_DIM_STATS_BETTER : NET_DIM_STATS_WORSE; + if (!prev->epms) + return NET_DIM_STATS_SAME; + if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) return (curr->epms < prev->epms) ? NET_DIM_STATS_BETTER : NET_DIM_STATS_WORSE; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ed0799a12bf2..24a62d590350 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1469,8 +1469,6 @@ enum netdev_priv_flags { * @base_addr: Device I/O address * @irq: Device IRQ number * - * @carrier_changes: Stats to monitor carrier on<->off transitions - * * @state: Generic network queuing layer state, see netdev_state_t * @dev_list: The global list of network devices * @napi_list: List entry used for polling NAPI devices @@ -1506,6 +1504,8 @@ enum netdev_priv_flags { * do not use this in drivers * @rx_nohandler: nohandler dropped packets by core network on * inactive devices, do not use this in drivers + * @carrier_up_count: Number of times the carrier has been up + * @carrier_down_count: Number of times the carrier has been down * * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, @@ -1680,8 +1680,6 @@ struct net_device { unsigned long base_addr; int irq; - atomic_t carrier_changes; - /* * Some hardware also needs these fields (state,dev_list, * napi_list,unreg_list,close_list) but they are not @@ -1719,6 +1717,10 @@ struct net_device { atomic_long_t tx_dropped; atomic_long_t rx_nohandler; + /* Stats to monitor link on/off, flapping */ + atomic_t carrier_up_count; + atomic_t carrier_down_count; + #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def *wireless_handlers; struct iw_public_data *wireless_data; @@ -2759,7 +2761,8 @@ static inline bool dev_validate_header(const struct net_device *dev, return false; } -typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); +typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, + int len, int size); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); static inline int unregister_gifconf(unsigned int family) { @@ -3312,7 +3315,9 @@ int netdev_rx_handler_register(struct net_device *dev, void netdev_rx_handler_unregister(struct net_device *dev); bool dev_valid_name(const char *name); -int dev_ioctl(struct net *net, unsigned int cmd, void __user *); +int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, + bool *need_copyout); +int dev_ifconf(struct net *net, struct ifconf *, int); int dev_ethtool(struct net *net, struct ifreq *); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *, unsigned int flags); diff --git a/include/linux/property.h b/include/linux/property.h index f6189a3ac63c..5b0563ad79a5 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -83,11 +83,17 @@ struct fwnode_handle *fwnode_get_next_parent( struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_child_node( const struct fwnode_handle *fwnode, struct fwnode_handle *child); +struct fwnode_handle *fwnode_get_next_available_child_node( + const struct fwnode_handle *fwnode, struct fwnode_handle *child); #define fwnode_for_each_child_node(fwnode, child) \ for (child = fwnode_get_next_child_node(fwnode, NULL); child; \ child = fwnode_get_next_child_node(fwnode, child)) +#define fwnode_for_each_available_child_node(fwnode, child) \ + for (child = fwnode_get_next_available_child_node(fwnode, NULL); child;\ + child = fwnode_get_next_available_child_node(fwnode, child)) + struct fwnode_handle *device_get_next_child_node( struct device *dev, struct fwnode_handle *child); @@ -103,6 +109,8 @@ struct fwnode_handle *device_get_named_child_node(struct device *dev, struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode); void fwnode_handle_put(struct fwnode_handle *fwnode); +int fwnode_irq_get(struct fwnode_handle *fwnode, unsigned int index); + unsigned int device_get_child_node_count(struct device *dev); static inline bool device_property_read_bool(struct device *dev, @@ -279,6 +287,9 @@ int device_get_phy_mode(struct device *dev); void *device_get_mac_address(struct device *dev, char *addr, int alen); +int fwnode_get_phy_mode(struct fwnode_handle *fwnode); +void *fwnode_get_mac_address(struct fwnode_handle *fwnode, + char *addr, int alen); struct fwnode_handle *fwnode_graph_get_next_endpoint( const struct fwnode_handle *fwnode, struct fwnode_handle *prev); struct fwnode_handle * diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 9c5a2628d6ce..1d3877c39a00 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -124,6 +124,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); } +static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline struct page *device_private_entry_to_page(swp_entry_t entry) { return pfn_to_page(swp_offset(entry)); @@ -154,6 +159,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry) return false; } +static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +{ + return 0; +} + static inline struct page *device_private_entry_to_page(swp_entry_t entry) { return NULL; @@ -189,6 +199,11 @@ static inline int is_write_migration_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); } +static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline struct page *migration_entry_to_page(swp_entry_t entry) { struct page *p = pfn_to_page(swp_offset(entry)); @@ -218,6 +233,12 @@ static inline int is_migration_entry(swp_entry_t swp) { return 0; } + +static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +{ + return 0; +} + static inline struct page *migration_entry_to_page(swp_entry_t entry) { return NULL; diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h index fe328c52c46b..801489bb14c3 100644 --- a/include/net/caif/cfpkt.h +++ b/include/net/caif/cfpkt.h @@ -32,6 +32,33 @@ void cfpkt_destroy(struct cfpkt *pkt); */ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len); +static inline u8 cfpkt_extr_head_u8(struct cfpkt *pkt) +{ + u8 tmp; + + cfpkt_extr_head(pkt, &tmp, 1); + + return tmp; +} + +static inline u16 cfpkt_extr_head_u16(struct cfpkt *pkt) +{ + __le16 tmp; + + cfpkt_extr_head(pkt, &tmp, 2); + + return le16_to_cpu(tmp); +} + +static inline u32 cfpkt_extr_head_u32(struct cfpkt *pkt) +{ + __le32 tmp; + + cfpkt_extr_head(pkt, &tmp, 4); + + return le32_to_cpu(tmp); +} + /* * Peek header from packet. * Reads data from packet without changing packet. diff --git a/include/net/erspan.h b/include/net/erspan.h index acdf6843095d..712ea1b1f4db 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -123,7 +123,7 @@ static inline void erspan_build_header(struct sk_buff *skb, __be32 id, u32 index, bool truncate, bool is_ipv4) { - struct ethhdr *eth = eth_hdr(skb); + struct ethhdr *eth = (struct ethhdr *)skb->data; enum erspan_encap_type enc_type; struct erspan_base_hdr *ershdr; struct erspan_metadata *ersmd; @@ -190,7 +190,7 @@ static inline void erspan_build_header_v2(struct sk_buff *skb, __be32 id, u8 direction, u16 hwid, bool truncate, bool is_ipv4) { - struct ethhdr *eth = eth_hdr(skb); + struct ethhdr *eth = (struct ethhdr *)skb->data; struct erspan_base_hdr *ershdr; struct erspan_metadata *md; struct qtag_prefix { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9dc1230d789c..8606c9113d3f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -332,6 +332,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index dd238950df81..663b015dace5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -143,22 +143,22 @@ static inline void nft_data_debug(const struct nft_data *data) * struct nft_ctx - nf_tables rule/set context * * @net: net namespace - * @afi: address family info * @table: the table the chain is contained in * @chain: the chain the rule is contained in * @nla: netlink attributes * @portid: netlink portID of the original message * @seq: netlink sequence number + * @family: protocol family * @report: notify via unicast netlink message */ struct nft_ctx { struct net *net; - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; const struct nlattr * const *nla; u32 portid; u32 seq; + u8 family; bool report; }; @@ -374,6 +374,7 @@ void nft_unregister_set(struct nft_set_type *type); * @list: table set list node * @bindings: list of set bindings * @name: name of the set + * @handle: unique handle of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) * @dtype: data type (verdict or numeric type defined by userspace) * @objtype: object type (see NFT_OBJECT_* definitions) @@ -396,6 +397,7 @@ struct nft_set { struct list_head list; struct list_head bindings; char *name; + u64 handle; u32 ktype; u32 dtype; u32 objtype; @@ -946,9 +948,11 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @objects: stateful objects in the table * @flowtables: flow tables in the table * @hgenerator: handle generator state + * @handle: table handle * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) * @genmask: generation mask + * @afinfo: address family info * @name: name of the table */ struct nft_table { @@ -958,38 +962,14 @@ struct nft_table { struct list_head objects; struct list_head flowtables; u64 hgenerator; + u64 handle; u32 use; - u16 flags:14, + u16 family:6, + flags:8, genmask:2; char *name; }; -enum nft_af_flags { - NFT_AF_NEEDS_DEV = (1 << 0), -}; - -/** - * struct nft_af_info - nf_tables address family info - * - * @list: used internally - * @family: address family - * @nhooks: number of hooks in this family - * @owner: module owner - * @tables: used internally - * @flags: family flags - */ -struct nft_af_info { - struct list_head list; - int family; - unsigned int nhooks; - struct module *owner; - struct list_head tables; - u32 flags; -}; - -int nft_register_afinfo(struct net *, struct nft_af_info *); -void nft_unregister_afinfo(struct net *, struct nft_af_info *); - int nft_register_chain_type(const struct nf_chain_type *); void nft_unregister_chain_type(const struct nf_chain_type *); @@ -1007,9 +987,9 @@ int nft_verdict_dump(struct sk_buff *skb, int type, * @name: name of this stateful object * @genmask: generation mask * @use: number of references to this stateful object - * @data: object data, layout depends on type + * @handle: unique object handle * @ops: object operations - * @data: pointer to object data + * @data: object data, layout depends on type */ struct nft_object { struct list_head list; @@ -1017,6 +997,7 @@ struct nft_object { struct nft_table *table; u32 genmask:2, use:30; + u64 handle; /* runtime data below here */ const struct nft_object_ops *ops ____cacheline_aligned; unsigned char data[] @@ -1098,6 +1079,7 @@ void nft_unregister_obj(struct nft_object_type *obj_type); * @ops_len: number of hooks in array * @genmask: generation mask * @use: number of references to this flow table + * @handle: unique object handle * @data: rhashtable and garbage collector * @ops: array of hooks */ @@ -1110,6 +1092,7 @@ struct nft_flowtable { int ops_len; u32 genmask:2, use:30; + u64 handle; /* runtime data below here */ struct nf_hook_ops *ops ____cacheline_aligned; struct nf_flowtable data; @@ -1154,9 +1137,6 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, void nft_trace_notify(struct nft_traceinfo *info); -#define MODULE_ALIAS_NFT_FAMILY(family) \ - MODULE_ALIAS("nft-afinfo-" __stringify(family)) - #define MODULE_ALIAS_NFT_CHAIN(family, name) \ MODULE_ALIAS("nft-chain-" __stringify(family) "-" name) diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index 4109b5f3010f..48134353411d 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -7,14 +7,8 @@ struct nft_af_info; struct netns_nftables { - struct list_head af_info; + struct list_head tables; struct list_head commit_list; - struct nft_af_info *ipv4; - struct nft_af_info *ipv6; - struct nft_af_info *inet; - struct nft_af_info *arp; - struct nft_af_info *bridge; - struct nft_af_info *netdev; unsigned int base_seq; u8 gencursor; }; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 789d818c4a61..fa2f6fb14093 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -376,7 +376,8 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts, bool ovr); + struct tcf_exts *exts, bool ovr, + struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); @@ -534,7 +535,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer) { switch (layer) { case TCF_LAYER_LINK: - return skb->data; + return skb_mac_header(skb); case TCF_LAYER_NETWORK: return skb_network_header(skb); case TCF_LAYER_TRANSPORT: @@ -556,13 +557,16 @@ static inline int tcf_valid_offset(const struct sk_buff *skb, #include <net/net_namespace.h> static inline int -tcf_change_indev(struct net *net, struct nlattr *indev_tlv) +tcf_change_indev(struct net *net, struct nlattr *indev_tlv, + struct netlink_ext_ack *extack) { char indev[IFNAMSIZ]; struct net_device *dev; - if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) + if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) { + NL_SET_ERR_MSG(extack, "Interface name too long"); return -EINVAL; + } dev = __dev_get_by_name(net, indev); if (!dev) return -ENODEV; @@ -598,17 +602,9 @@ struct tc_cls_common_offload { u32 chain_index; __be16 protocol; u32 prio; + struct netlink_ext_ack *extack; }; -static inline void -tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, - const struct tcf_proto *tp) -{ - cls_common->chain_index = tp->chain->index; - cls_common->protocol = tp->protocol; - cls_common->prio = tp->prio; -} - struct tc_cls_u32_knode { struct tcf_exts *exts; struct tc_u32_sel *sel; @@ -649,6 +645,17 @@ static inline bool tc_can_offload(const struct net_device *dev) return dev->features & NETIF_F_HW_TC; } +static inline bool tc_can_offload_extack(const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + bool can = tc_can_offload(dev); + + if (!can) + NL_SET_ERR_MSG(extack, "TC offload is disabled on net device"); + + return can; +} + static inline bool tc_skip_hw(u32 flags) { return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; @@ -676,6 +683,18 @@ static inline bool tc_in_hw(u32 flags) return (flags & TCA_CLS_FLAGS_IN_HW) ? true : false; } +static inline void +tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, + const struct tcf_proto *tp, u32 flags, + struct netlink_ext_ack *extack) +{ + cls_common->chain_index = tp->chain->index; + cls_common->protocol = tp->protocol; + cls_common->prio = tp->prio; + if (tc_skip_sw(flags)) + cls_common->extack = extack; +} + enum tc_fl_command { TC_CLSFLOWER_REPLACE, TC_CLSFLOWER_DESTROY, @@ -718,7 +737,6 @@ struct tc_cls_bpf_offload { struct bpf_prog *oldprog; const char *name; bool exts_integrated; - u32 gen_flags; }; struct tc_mqprio_qopt_offload { diff --git a/include/net/route.h b/include/net/route.h index d538e6db1afe..1eb9ce470e25 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -217,7 +217,7 @@ unsigned int inet_addr_type_dev_table(struct net *net, const struct net_device *dev, __be32 addr); void ip_rt_multicast_event(struct in_device *); -int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg); +int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index cfc19d0ba2ad..eac43e8ca96d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -233,14 +233,18 @@ struct tcf_proto_ops { const struct tcf_proto *, struct tcf_result *); int (*init)(struct tcf_proto*); - void (*destroy)(struct tcf_proto*); + void (*destroy)(struct tcf_proto *tp, + struct netlink_ext_ack *extack); void* (*get)(struct tcf_proto*, u32 handle); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - void **, bool); - int (*delete)(struct tcf_proto*, void *, bool*); + void **, bool, + struct netlink_ext_ack *); + int (*delete)(struct tcf_proto *tp, void *arg, + bool *last, + struct netlink_ext_ack *); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); void (*bind_class)(void *, u32, unsigned long); diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index 781f3433a0be..9470fd7e4350 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -6,10 +6,16 @@ #include <net/act_api.h> #include <linux/tc_act/tc_csum.h> +struct tcf_csum_params { + int action; + u32 update_flags; + struct rcu_head rcu; +}; + struct tcf_csum { struct tc_action common; - u32 update_flags; + struct tcf_csum_params __rcu *params; }; #define to_tcf_csum(a) ((struct tcf_csum *)a) @@ -24,7 +30,13 @@ static inline bool is_tcf_csum(const struct tc_action *a) static inline u32 tcf_csum_update_flags(const struct tc_action *a) { - return to_tcf_csum(a)->update_flags; + u32 update_flags; + + rcu_read_lock(); + update_flags = rcu_dereference(to_tcf_csum(a)->params)->update_flags; + rcu_read_unlock(); + + return update_flags; } #endif /* __NET_TC_CSUM_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6939e69d3c37..5a1d26a18599 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -953,6 +953,7 @@ struct rate_sample { u32 prior_in_flight; /* in flight before this ACK */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ + bool is_ack_delayed; /* is this (likely) a delayed ACK? */ }; struct tcp_congestion_ops { diff --git a/include/net/wext.h b/include/net/wext.h index e51f067fdb3a..aa192a670304 100644 --- a/include/net/wext.h +++ b/include/net/wext.h @@ -7,7 +7,7 @@ struct net; #ifdef CONFIG_WEXT_CORE -int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, +int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg); int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, unsigned long arg); @@ -15,7 +15,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, struct iw_statistics *get_wireless_stats(struct net_device *dev); int call_commit_handler(struct net_device *dev); #else -static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, +static inline int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg) { return -EINVAL; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7c2259e8bc54..406c19d6016b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -17,7 +17,7 @@ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ -#define BPF_DW 0x18 /* double word */ +#define BPF_DW 0x18 /* double word (64-bit) */ #define BPF_XADD 0xc0 /* exclusive add */ /* alu/jmp fields */ @@ -938,6 +938,9 @@ struct bpf_map_info { __u32 max_entries; __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/include/uapi/linux/bpf_common.h b/include/uapi/linux/bpf_common.h index 18be90725ab0..ee97668bdadb 100644 --- a/include/uapi/linux/bpf_common.h +++ b/include/uapi/linux/bpf_common.h @@ -15,9 +15,10 @@ /* ld/ldx fields */ #define BPF_SIZE(code) ((code) & 0x18) -#define BPF_W 0x00 -#define BPF_H 0x08 -#define BPF_B 0x10 +#define BPF_W 0x00 /* 32-bit */ +#define BPF_H 0x08 /* 16-bit */ +#define BPF_B 0x10 /* 8-bit */ +/* eBPF BPF_DW 0x18 64-bit */ #define BPF_MODE(code) ((code) & 0xe0) #define BPF_IMM 0x00 #define BPF_ABS 0x20 diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index f8f04fed6186..8616131e2c61 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -161,6 +161,8 @@ enum { IFLA_EVENT, IFLA_NEW_NETNSID, IFLA_IF_NETNSID, + IFLA_CARRIER_UP_COUNT, + IFLA_CARRIER_DOWN_COUNT, __IFLA_MAX }; diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 2e522835a4af..98e4d5d7c45c 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -18,7 +18,7 @@ #define MACSEC_GENL_NAME "macsec" #define MACSEC_GENL_VERSION 1 -#define MACSEC_MAX_KEY_LEN 256 +#define MACSEC_MAX_KEY_LEN 128 #define MACSEC_KEYID_LEN 16 @@ -26,9 +26,9 @@ #define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL #define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL -#define MACSEC_DEFAULT_CIPHER_ID MACSEC_CIPHER_ID_GCM_AES_128 /* deprecated cipher ID for GCM-AES-128 */ -#define MACSEC_DEFAULT_CIPHER_ALT 0x0080020001000001ULL +#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL +#define MACSEC_DEFAULT_CIPHER_ALT MACSEC_CIPHER_ID_GCM_AES_128 #define MACSEC_MIN_ICV_LEN 8 #define MACSEC_MAX_ICV_LEN 32 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 496e59a2738b..8fb90a0819c3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -932,6 +932,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_S390_AIS_MIGRATION 150 +#define KVM_CAP_PPC_GET_CPU_CHAR 151 +#define KVM_CAP_S390_BPB 152 #ifdef KVM_CAP_IRQ_ROUTING @@ -1261,6 +1263,8 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) /* Available with KVM_CAP_PPC_RADIX_MMU */ #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) +/* Available with KVM_CAP_PPC_GET_CPU_CHAR */ +#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 71e62795104d..7d570c7bd117 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -97,7 +97,7 @@ enum { L2TP_ATTR_OFFSET, /* u16 (not used) */ L2TP_ATTR_DATA_SEQ, /* u16 */ L2TP_ATTR_L2SPEC_TYPE, /* u8, enum l2tp_l2spec_type */ - L2TP_ATTR_L2SPEC_LEN, /* u8, enum l2tp_l2spec_type */ + L2TP_ATTR_L2SPEC_LEN, /* u8 (not used) */ L2TP_ATTR_PROTO_VERSION, /* u8 */ L2TP_ATTR_IFNAME, /* string */ L2TP_ATTR_CONN_ID, /* u32 */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 53e8dd2a3a03..66dceee0ae30 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -174,6 +174,8 @@ enum nft_table_attributes { NFTA_TABLE_NAME, NFTA_TABLE_FLAGS, NFTA_TABLE_USE, + NFTA_TABLE_HANDLE, + NFTA_TABLE_PAD, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) @@ -317,6 +319,7 @@ enum nft_set_desc_attributes { * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) * @NFTA_SET_USERDATA: user data (NLA_BINARY) * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*) + * @NFTA_SET_HANDLE: set handle (NLA_U64) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -335,6 +338,7 @@ enum nft_set_attributes { NFTA_SET_USERDATA, NFTA_SET_PAD, NFTA_SET_OBJ_TYPE, + NFTA_SET_HANDLE, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) @@ -1314,6 +1318,7 @@ enum nft_ct_helper_attributes { * @NFTA_OBJ_TYPE: stateful object type (NLA_U32) * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) + * @NFTA_OBJ_HANDLE: object handle (NLA_U64) */ enum nft_object_attributes { NFTA_OBJ_UNSPEC, @@ -1322,6 +1327,8 @@ enum nft_object_attributes { NFTA_OBJ_TYPE, NFTA_OBJ_DATA, NFTA_OBJ_USE, + NFTA_OBJ_HANDLE, + NFTA_OBJ_PAD, __NFTA_OBJ_MAX }; #define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) @@ -1333,6 +1340,7 @@ enum nft_object_attributes { * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING) * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) + * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64) */ enum nft_flowtable_attributes { NFTA_FLOWTABLE_UNSPEC, @@ -1340,6 +1348,8 @@ enum nft_flowtable_attributes { NFTA_FLOWTABLE_NAME, NFTA_FLOWTABLE_HOOK, NFTA_FLOWTABLE_USE, + NFTA_FLOWTABLE_HANDLE, + NFTA_FLOWTABLE_PAD, __NFTA_FLOWTABLE_MAX }; #define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h index e6b1a84f5dd3..c3b060775e13 100644 --- a/include/uapi/linux/netfilter_ipv4.h +++ b/include/uapi/linux/netfilter_ipv4.h @@ -57,6 +57,7 @@ enum nf_ip_hook_priorities { NF_IP_PRI_FIRST = INT_MIN, + NF_IP_PRI_RAW_BEFORE_DEFRAG = -450, NF_IP_PRI_CONNTRACK_DEFRAG = -400, NF_IP_PRI_RAW = -300, NF_IP_PRI_SELINUX_FIRST = -225, diff --git a/include/uapi/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h index 2f9724611cc2..dc624fd24d25 100644 --- a/include/uapi/linux/netfilter_ipv6.h +++ b/include/uapi/linux/netfilter_ipv6.h @@ -62,6 +62,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_FIRST = INT_MIN, + NF_IP6_PRI_RAW_BEFORE_DEFRAG = -450, NF_IP6_PRI_CONNTRACK_DEFRAG = -400, NF_IP6_PRI_RAW = -300, NF_IP6_PRI_SELINUX_FIRST = -225, diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h new file mode 100644 index 000000000000..f3cc0ef514a7 --- /dev/null +++ b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _IP6T_SRH_H +#define _IP6T_SRH_H + +#include <linux/types.h> +#include <linux/netfilter.h> + +/* Values for "mt_flags" field in struct ip6t_srh */ +#define IP6T_SRH_NEXTHDR 0x0001 +#define IP6T_SRH_LEN_EQ 0x0002 +#define IP6T_SRH_LEN_GT 0x0004 +#define IP6T_SRH_LEN_LT 0x0008 +#define IP6T_SRH_SEGS_EQ 0x0010 +#define IP6T_SRH_SEGS_GT 0x0020 +#define IP6T_SRH_SEGS_LT 0x0040 +#define IP6T_SRH_LAST_EQ 0x0080 +#define IP6T_SRH_LAST_GT 0x0100 +#define IP6T_SRH_LAST_LT 0x0200 +#define IP6T_SRH_TAG 0x0400 +#define IP6T_SRH_MASK 0x07FF + +/* Values for "mt_invflags" field in struct ip6t_srh */ +#define IP6T_SRH_INV_NEXTHDR 0x0001 +#define IP6T_SRH_INV_LEN_EQ 0x0002 +#define IP6T_SRH_INV_LEN_GT 0x0004 +#define IP6T_SRH_INV_LEN_LT 0x0008 +#define IP6T_SRH_INV_SEGS_EQ 0x0010 +#define IP6T_SRH_INV_SEGS_GT 0x0020 +#define IP6T_SRH_INV_SEGS_LT 0x0040 +#define IP6T_SRH_INV_LAST_EQ 0x0080 +#define IP6T_SRH_INV_LAST_GT 0x0100 +#define IP6T_SRH_INV_LAST_LT 0x0200 +#define IP6T_SRH_INV_TAG 0x0400 +#define IP6T_SRH_INV_MASK 0x07FF + +/** + * struct ip6t_srh - SRH match options + * @ next_hdr: Next header field of SRH + * @ hdr_len: Extension header length field of SRH + * @ segs_left: Segments left field of SRH + * @ last_entry: Last entry field of SRH + * @ tag: Tag field of SRH + * @ mt_flags: match options + * @ mt_invflags: Invert the sense of match options + */ + +struct ip6t_srh { + __u8 next_hdr; + __u8 hdr_len; + __u8 segs_left; + __u8 last_entry; + __u16 tag; + __u16 mt_flags; + __u16 mt_invflags; +}; + +#endif /*_IP6T_SRH_H*/ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index ab94d304a634..b1f66480135b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -49,27 +49,35 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) } /* Called from syscall */ -static struct bpf_map *array_map_alloc(union bpf_attr *attr) +static int array_map_alloc_check(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); - u32 elem_size, index_mask, max_entries; - bool unpriv = !capable(CAP_SYS_ADMIN); - struct bpf_array *array; - u64 array_size, mask64; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size == 0 || attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || (percpu && numa_node != NUMA_NO_NODE)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (attr->value_size > KMALLOC_MAX_SIZE) /* if value_size is bigger, the user space won't be able to * access the elements. */ - return ERR_PTR(-E2BIG); + return -E2BIG; + + return 0; +} + +static struct bpf_map *array_map_alloc(union bpf_attr *attr) +{ + bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; + int numa_node = bpf_map_attr_numa_node(attr); + u32 elem_size, index_mask, max_entries; + bool unpriv = !capable(CAP_SYS_ADMIN); + struct bpf_array *array; + u64 array_size, mask64; elem_size = round_up(attr->value_size, 8); @@ -112,12 +120,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->map.unpriv_array = unpriv; /* copy mandatory map attributes */ - array->map.map_type = attr->map_type; - array->map.key_size = attr->key_size; - array->map.value_size = attr->value_size; - array->map.max_entries = attr->max_entries; - array->map.map_flags = attr->map_flags; - array->map.numa_node = numa_node; + bpf_map_init_from_attr(&array->map, attr); array->elem_size = elem_size; if (!percpu) @@ -327,6 +330,7 @@ static void array_map_free(struct bpf_map *map) } const struct bpf_map_ops array_map_ops = { + .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, @@ -337,6 +341,7 @@ const struct bpf_map_ops array_map_ops = { }; const struct bpf_map_ops percpu_array_map_ops = { + .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, @@ -345,12 +350,12 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_delete_elem = array_map_delete_elem, }; -static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) +static int fd_array_map_alloc_check(union bpf_attr *attr) { /* only file descriptors can be stored in this type of map */ if (attr->value_size != sizeof(u32)) - return ERR_PTR(-EINVAL); - return array_map_alloc(attr); + return -EINVAL; + return array_map_alloc_check(attr); } static void fd_array_map_free(struct bpf_map *map) @@ -474,7 +479,8 @@ void bpf_fd_array_map_clear(struct bpf_map *map) } const struct bpf_map_ops prog_array_map_ops = { - .map_alloc = fd_array_map_alloc, + .map_alloc_check = fd_array_map_alloc_check, + .map_alloc = array_map_alloc, .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, @@ -561,7 +567,8 @@ static void perf_event_fd_array_release(struct bpf_map *map, } const struct bpf_map_ops perf_event_array_map_ops = { - .map_alloc = fd_array_map_alloc, + .map_alloc_check = fd_array_map_alloc_check, + .map_alloc = array_map_alloc, .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, @@ -592,7 +599,8 @@ static void cgroup_fd_array_free(struct bpf_map *map) } const struct bpf_map_ops cgroup_array_map_ops = { - .map_alloc = fd_array_map_alloc, + .map_alloc_check = fd_array_map_alloc_check, + .map_alloc = array_map_alloc, .map_free = cgroup_fd_array_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, @@ -610,7 +618,7 @@ static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) if (IS_ERR(inner_map_meta)) return inner_map_meta; - map = fd_array_map_alloc(attr); + map = array_map_alloc(attr); if (IS_ERR(map)) { bpf_map_meta_free(inner_map_meta); return map; @@ -673,6 +681,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map, } const struct bpf_map_ops array_of_maps_map_ops = { + .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_of_map_alloc, .map_free = array_of_map_free, .map_get_next_key = array_map_get_next_key, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 25e723b0dfd4..3aa0658add76 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -300,6 +300,11 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, } #ifdef CONFIG_BPF_JIT +/* All BPF JIT sysctl knobs here. */ +int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); +int bpf_jit_harden __read_mostly; +int bpf_jit_kallsyms __read_mostly; + static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, unsigned long *symbol_start, @@ -381,8 +386,6 @@ static DEFINE_SPINLOCK(bpf_lock); static LIST_HEAD(bpf_kallsyms); static struct latch_tree_root bpf_tree __cacheline_aligned; -int bpf_jit_kallsyms __read_mostly; - static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) { WARN_ON_ONCE(!list_empty(&aux->ksym_lnode)); @@ -563,8 +566,6 @@ void __weak bpf_jit_free(struct bpf_prog *fp) bpf_prog_unlock_free(fp); } -int bpf_jit_harden __read_mostly; - static int bpf_jit_blind_insn(const struct bpf_insn *from, const struct bpf_insn *aux, struct bpf_insn *to_buff) @@ -970,7 +971,7 @@ select_insn: DST = tmp; CONT; ALU_MOD_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; DST = do_div(tmp, (u32) SRC); @@ -989,7 +990,7 @@ select_insn: DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; do_div(tmp, (u32) SRC); @@ -1379,9 +1380,13 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) } #else -static unsigned int __bpf_prog_ret0(const void *ctx, - const struct bpf_insn *insn) +static unsigned int __bpf_prog_ret0_warn(const void *ctx, + const struct bpf_insn *insn) { + /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON + * is not working properly, so warn about it! + */ + WARN_ON_ONCE(1); return 0; } #endif @@ -1441,7 +1446,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; #else - fp->bpf_func = __bpf_prog_ret0; + fp->bpf_func = __bpf_prog_ret0_warn; #endif /* eBPF JITs can rewrite the program in case constant diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 584e02227671..d7ea96218516 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -591,9 +591,100 @@ unlock: raw_spin_unlock(&trie->lock); } -static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) +static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) { - return -ENOTSUPP; + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); + struct bpf_lpm_trie_key *key = _key, *next_key = _next_key; + struct lpm_trie_node *node, *next_node = NULL, *parent; + struct lpm_trie_node **node_stack = NULL; + struct lpm_trie_node __rcu **root; + int err = 0, stack_ptr = -1; + unsigned int next_bit; + size_t matchlen; + + /* The get_next_key follows postorder. For the 4 node example in + * the top of this file, the trie_get_next_key() returns the following + * one after another: + * 192.168.0.0/24 + * 192.168.1.0/24 + * 192.168.128.0/24 + * 192.168.0.0/16 + * + * The idea is to return more specific keys before less specific ones. + */ + + /* Empty trie */ + if (!rcu_dereference(trie->root)) + return -ENOENT; + + /* For invalid key, find the leftmost node in the trie */ + if (!key || key->prefixlen > trie->max_prefixlen) { + root = &trie->root; + goto find_leftmost; + } + + node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *), + GFP_USER | __GFP_NOWARN); + if (!node_stack) + return -ENOMEM; + + /* Try to find the exact node for the given key */ + for (node = rcu_dereference(trie->root); node;) { + node_stack[++stack_ptr] = node; + matchlen = longest_prefix_match(trie, node, key); + if (node->prefixlen != matchlen || + node->prefixlen == key->prefixlen) + break; + + next_bit = extract_bit(key->data, node->prefixlen); + node = rcu_dereference(node->child[next_bit]); + } + if (!node || node->prefixlen != key->prefixlen || + (node->flags & LPM_TREE_NODE_FLAG_IM)) { + root = &trie->root; + goto find_leftmost; + } + + /* The node with the exactly-matching key has been found, + * find the first node in postorder after the matched node. + */ + node = node_stack[stack_ptr]; + while (stack_ptr > 0) { + parent = node_stack[stack_ptr - 1]; + if (rcu_dereference(parent->child[0]) == node && + rcu_dereference(parent->child[1])) { + root = &parent->child[1]; + goto find_leftmost; + } + if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) { + next_node = parent; + goto do_copy; + } + + node = parent; + stack_ptr--; + } + + /* did not find anything */ + err = -ENOENT; + goto free_stack; + +find_leftmost: + /* Find the leftmost non-intermediate node, all intermediate nodes + * have exact two children, so this function will never return NULL. + */ + for (node = rcu_dereference(*root); node;) { + if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) + next_node = node; + node = rcu_dereference(node->child[0]); + } +do_copy: + next_key->prefixlen = next_node->prefixlen; + memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data), + next_node->data, trie->data_size); +free_stack: + kfree(node_stack); + return err; } const struct bpf_map_ops trie_map_ops = { diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index a88cebf368bf..c9401075b58c 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -230,9 +230,12 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info, .prog = prog, .info = info, }; + struct bpf_prog_aux *aux = prog->aux; struct inode *ns_inode; struct path ns_path; + char __user *uinsns; void *res; + u32 ulen; res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args); if (IS_ERR(res)) { @@ -241,6 +244,26 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info, return PTR_ERR(res); } + down_read(&bpf_devs_lock); + + if (!aux->offload) { + up_read(&bpf_devs_lock); + return -ENODEV; + } + + ulen = info->jited_prog_len; + info->jited_prog_len = aux->offload->jited_len; + if (info->jited_prog_len & ulen) { + uinsns = u64_to_user_ptr(info->jited_prog_insns); + ulen = min_t(u32, info->jited_prog_len, ulen); + if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) { + up_read(&bpf_devs_lock); + return -EFAULT; + } + } + + up_read(&bpf_devs_lock); + ns_inode = ns_path.dentry->d_inode; info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); info->netns_ino = ns_inode->i_ino; @@ -276,7 +299,8 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) if (!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - if (attr->map_type != BPF_MAP_TYPE_HASH) + if (attr->map_type != BPF_MAP_TYPE_ARRAY && + attr->map_type != BPF_MAP_TYPE_HASH) return ERR_PTR(-EINVAL); offmap = kzalloc(sizeof(*offmap), GFP_USER); @@ -389,6 +413,61 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key) return ret; } +struct ns_get_path_bpf_map_args { + struct bpf_offloaded_map *offmap; + struct bpf_map_info *info; +}; + +static struct ns_common *bpf_map_offload_info_fill_ns(void *private_data) +{ + struct ns_get_path_bpf_map_args *args = private_data; + struct ns_common *ns; + struct net *net; + + rtnl_lock(); + down_read(&bpf_devs_lock); + + if (args->offmap->netdev) { + args->info->ifindex = args->offmap->netdev->ifindex; + net = dev_net(args->offmap->netdev); + get_net(net); + ns = &net->ns; + } else { + args->info->ifindex = 0; + ns = NULL; + } + + up_read(&bpf_devs_lock); + rtnl_unlock(); + + return ns; +} + +int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map) +{ + struct ns_get_path_bpf_map_args args = { + .offmap = map_to_offmap(map), + .info = info, + }; + struct inode *ns_inode; + struct path ns_path; + void *res; + + res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args); + if (IS_ERR(res)) { + if (!info->ifindex) + return -ENODEV; + return PTR_ERR(res); + } + + ns_inode = ns_path.dentry->d_inode; + info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); + info->netns_ino = ns_inode->i_ino; + path_put(&ns_path); + + return 0; +} + bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map) { struct bpf_offloaded_map *offmap; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c691b9e972e3..5bdb0cc84ad2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1504,6 +1504,8 @@ static int bpf_prog_test_run(const union bpf_attr *attr, struct bpf_prog *prog; int ret = -ENOTSUPP; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; if (CHECK_ATTR(BPF_PROG_TEST_RUN)) return -EINVAL; @@ -1724,19 +1726,6 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, goto done; } - ulen = info.jited_prog_len; - info.jited_prog_len = prog->jited_len; - if (info.jited_prog_len && ulen) { - if (bpf_dump_raw_ok()) { - uinsns = u64_to_user_ptr(info.jited_prog_insns); - ulen = min_t(u32, info.jited_prog_len, ulen); - if (copy_to_user(uinsns, prog->bpf_func, ulen)) - return -EFAULT; - } else { - info.jited_prog_insns = 0; - } - } - ulen = info.xlated_prog_len; info.xlated_prog_len = bpf_prog_insn_size(prog); if (info.xlated_prog_len && ulen) { @@ -1762,6 +1751,24 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, err = bpf_prog_offload_info_fill(&info, prog); if (err) return err; + goto done; + } + + /* NOTE: the following code is supposed to be skipped for offload. + * bpf_prog_offload_info_fill() is the place to fill similar fields + * for offload. + */ + ulen = info.jited_prog_len; + info.jited_prog_len = prog->jited_len; + if (info.jited_prog_len && ulen) { + if (bpf_dump_raw_ok()) { + uinsns = u64_to_user_ptr(info.jited_prog_insns); + ulen = min_t(u32, info.jited_prog_len, ulen); + if (copy_to_user(uinsns, prog->bpf_func, ulen)) + return -EFAULT; + } else { + info.jited_prog_insns = 0; + } } done: @@ -1794,6 +1801,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, info.map_flags = map->map_flags; memcpy(info.name, map->name, sizeof(map->name)); + if (bpf_map_is_dev_bound(map)) { + err = bpf_map_offload_info_fill(&info, map); + if (err) + return err; + } + if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) return -EFAULT; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2e7a43edf264..dfb138b46488 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1349,6 +1349,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); } +static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = cur_regs(env) + regno; + + return reg->type == PTR_TO_CTX; +} + static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict) @@ -1728,6 +1735,12 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins return -EACCES; } + if (is_ctx_reg(env, insn->dst_reg)) { + verbose(env, "BPF_XADD stores into R%d context is not allowed\n", + insn->dst_reg); + return -EACCES; + } + /* check whether atomic_add can read the memory */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); @@ -1837,6 +1850,19 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, } } +static bool arg_type_is_mem_ptr(enum bpf_arg_type type) +{ + return type == ARG_PTR_TO_MEM || + type == ARG_PTR_TO_MEM_OR_NULL || + type == ARG_PTR_TO_UNINIT_MEM; +} + +static bool arg_type_is_mem_size(enum bpf_arg_type type) +{ + return type == ARG_CONST_SIZE || + type == ARG_CONST_SIZE_OR_ZERO; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) @@ -1886,9 +1912,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, expected_type = PTR_TO_CTX; if (type != expected_type) goto err_type; - } else if (arg_type == ARG_PTR_TO_MEM || - arg_type == ARG_PTR_TO_MEM_OR_NULL || - arg_type == ARG_PTR_TO_UNINIT_MEM) { + } else if (arg_type_is_mem_ptr(arg_type)) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be * passed in as argument, it's a SCALAR_VALUE type. Final test @@ -1949,25 +1973,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, err = check_stack_boundary(env, regno, meta->map_ptr->value_size, false, NULL); - } else if (arg_type == ARG_CONST_SIZE || - arg_type == ARG_CONST_SIZE_OR_ZERO) { + } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); - /* bpf_xxx(..., buf, len) call will access 'len' bytes - * from stack pointer 'buf'. Check it - * note: regno == len, regno - 1 == buf - */ - if (regno == 0) { - /* kernel subsystem misconfigured verifier */ - verbose(env, - "ARG_CONST_SIZE cannot be first argument\n"); - return -EACCES; - } - /* The register is SCALAR_VALUE; the access check * happens using its boundaries. */ - if (!tnum_is_const(reg->var_off)) /* For unprivileged variable accesses, disable raw * mode so that the program is required to @@ -2111,7 +2122,7 @@ error: return -EINVAL; } -static int check_raw_mode(const struct bpf_func_proto *fn) +static bool check_raw_mode_ok(const struct bpf_func_proto *fn) { int count = 0; @@ -2126,7 +2137,44 @@ static int check_raw_mode(const struct bpf_func_proto *fn) if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) count++; - return count > 1 ? -EINVAL : 0; + /* We only support one arg being in raw mode at the moment, + * which is sufficient for the helper functions we have + * right now. + */ + return count <= 1; +} + +static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, + enum bpf_arg_type arg_next) +{ + return (arg_type_is_mem_ptr(arg_curr) && + !arg_type_is_mem_size(arg_next)) || + (!arg_type_is_mem_ptr(arg_curr) && + arg_type_is_mem_size(arg_next)); +} + +static bool check_arg_pair_ok(const struct bpf_func_proto *fn) +{ + /* bpf_xxx(..., buf, len) call will access 'len' + * bytes from memory 'buf'. Both arg types need + * to be paired, so make sure there's no buggy + * helper function specification. + */ + if (arg_type_is_mem_size(fn->arg1_type) || + arg_type_is_mem_ptr(fn->arg5_type) || + check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || + check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || + check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || + check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) + return false; + + return true; +} + +static int check_func_proto(const struct bpf_func_proto *fn) +{ + return check_raw_mode_ok(fn) && + check_arg_pair_ok(fn) ? 0 : -EINVAL; } /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] @@ -2282,7 +2330,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn if (env->ops->get_func_proto) fn = env->ops->get_func_proto(func_id); - if (!fn) { verbose(env, "unknown func %s#%d\n", func_id_name(func_id), func_id); @@ -2306,10 +2353,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn memset(&meta, 0, sizeof(meta)); meta.pkt_access = fn->pkt_access; - /* We only support one arg being in raw mode at the moment, which - * is sufficient for the helper functions we have right now. - */ - err = check_raw_mode(fn); + err = check_func_proto(fn); if (err) { verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(func_id), func_id); @@ -2478,17 +2522,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg = ®s[dst]; - if (WARN_ON_ONCE(known && (smin_val != smax_val))) { - print_verifier_state(env, state); - verbose(env, - "verifier internal error: known but bad sbounds\n"); - return -EINVAL; - } - if (WARN_ON_ONCE(known && (umin_val != umax_val))) { - print_verifier_state(env, state); - verbose(env, - "verifier internal error: known but bad ubounds\n"); - return -EINVAL; + if ((known && (smin_val != smax_val || umin_val != umax_val)) || + smin_val > smax_val || umin_val > umax_val) { + /* Taint dst register if offset had invalid bounds derived from + * e.g. dead branches. + */ + __mark_reg_unknown(dst_reg); + return 0; } if (BPF_CLASS(insn->code) != BPF_ALU64) { @@ -2680,6 +2720,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, src_known = tnum_is_const(src_reg.var_off); dst_known = tnum_is_const(dst_reg->var_off); + if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || + smin_val > smax_val || umin_val > umax_val) { + /* Taint dst register if offset had invalid bounds derived from + * e.g. dead branches. + */ + __mark_reg_unknown(dst_reg); + return 0; + } + if (!src_known && opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { __mark_reg_unknown(dst_reg); @@ -4661,6 +4710,12 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + if (is_ctx_reg(env, insn->dst_reg)) { + verbose(env, "BPF_ST stores into R%d context is not allowed\n", + insn->dst_reg); + return -EACCES; + } + /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, @@ -4779,7 +4834,8 @@ process_bpf_exit: insn_idx++; } - verbose(env, "processed %d insns, stack depth ", insn_processed); + verbose(env, "processed %d insns (limit %d), stack depth ", + insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); for (i = 0; i < env->subprog_cnt + 1; i++) { u32 depth = env->subprog_stack_depth[i]; @@ -5330,6 +5386,24 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) int i, cnt, delta = 0; for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) || + insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { + /* due to JIT bugs clear upper 32-bits of src register + * before div/mod operation + */ + insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg); + insn_buf[1] = *insn; + cnt = 2; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (insn->code != (BPF_JMP | BPF_CALL)) continue; if (insn->src_reg == BPF_PSEUDO_CALL) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 2cf06c274e4c..7e4c44538119 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4447,6 +4447,7 @@ static struct cftype cgroup_base_files[] = { }, { .name = "cgroup.threads", + .flags = CFTYPE_NS_DELEGATABLE, .release = cgroup_procs_release, .seq_start = cgroup_threads_start, .seq_next = cgroup_procs_next, diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 4a1c33416b6a..e2764d767f18 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -51,16 +51,16 @@ void __delayacct_tsk_init(struct task_struct *tsk) * Finish delay accounting for a statistic using its timestamps (@start), * accumalator (@total) and @count */ -static void delayacct_end(u64 *start, u64 *total, u32 *count) +static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count) { s64 ns = ktime_get_ns() - *start; unsigned long flags; if (ns > 0) { - spin_lock_irqsave(¤t->delays->lock, flags); + spin_lock_irqsave(lock, flags); *total += ns; (*count)++; - spin_unlock_irqrestore(¤t->delays->lock, flags); + spin_unlock_irqrestore(lock, flags); } } @@ -69,17 +69,25 @@ void __delayacct_blkio_start(void) current->delays->blkio_start = ktime_get_ns(); } -void __delayacct_blkio_end(void) +/* + * We cannot rely on the `current` macro, as we haven't yet switched back to + * the process being woken. + */ +void __delayacct_blkio_end(struct task_struct *p) { - if (current->delays->flags & DELAYACCT_PF_SWAPIN) - /* Swapin block I/O */ - delayacct_end(¤t->delays->blkio_start, - ¤t->delays->swapin_delay, - ¤t->delays->swapin_count); - else /* Other block I/O */ - delayacct_end(¤t->delays->blkio_start, - ¤t->delays->blkio_delay, - ¤t->delays->blkio_count); + struct task_delay_info *delays = p->delays; + u64 *total; + u32 *count; + + if (p->delays->flags & DELAYACCT_PF_SWAPIN) { + total = &delays->swapin_delay; + count = &delays->swapin_count; + } else { + total = &delays->blkio_delay; + count = &delays->blkio_count; + } + + delayacct_end(&delays->lock, &delays->blkio_start, total, count); } int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) @@ -153,8 +161,10 @@ void __delayacct_freepages_start(void) void __delayacct_freepages_end(void) { - delayacct_end(¤t->delays->freepages_start, - ¤t->delays->freepages_delay, - ¤t->delays->freepages_count); + delayacct_end( + ¤t->delays->lock, + ¤t->delays->freepages_start, + ¤t->delays->freepages_delay, + ¤t->delays->freepages_count); } diff --git a/kernel/futex.c b/kernel/futex.c index 57d0b3657e16..8c5424dd5924 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1878,6 +1878,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_q *this, *next; DEFINE_WAKE_Q(wake_q); + if (nr_wake < 0 || nr_requeue < 0) + return -EINVAL; + /* * When PI not supported: return -ENOSYS if requeue_pi is true, * consequently the compiler knows requeue_pi is always false past @@ -2294,21 +2297,17 @@ static void unqueue_me_pi(struct futex_q *q) spin_unlock(q->lock_ptr); } -/* - * Fixup the pi_state owner with the new owner. - * - * Must be called with hash bucket lock held and mm->sem held for non - * private futexes. - */ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *newowner) + struct task_struct *argowner) { - u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; u32 uval, uninitialized_var(curval), newval; - struct task_struct *oldowner; + struct task_struct *oldowner, *newowner; + u32 newtid; int ret; + lockdep_assert_held(q->lock_ptr); + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); oldowner = pi_state->owner; @@ -2317,11 +2316,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, newtid |= FUTEX_OWNER_DIED; /* - * We are here either because we stole the rtmutex from the - * previous highest priority waiter or we are the highest priority - * waiter but have failed to get the rtmutex the first time. + * We are here because either: + * + * - we stole the lock and pi_state->owner needs updating to reflect + * that (@argowner == current), + * + * or: + * + * - someone stole our lock and we need to fix things to point to the + * new owner (@argowner == NULL). * - * We have to replace the newowner TID in the user space variable. + * Either way, we have to replace the TID in the user space variable. * This must be atomic as we have to preserve the owner died bit here. * * Note: We write the user space value _before_ changing the pi_state @@ -2334,6 +2339,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * in the PID check in lookup_pi_state. */ retry: + if (!argowner) { + if (oldowner != current) { + /* + * We raced against a concurrent self; things are + * already fixed up. Nothing to do. + */ + ret = 0; + goto out_unlock; + } + + if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { + /* We got the lock after all, nothing to fix. */ + ret = 0; + goto out_unlock; + } + + /* + * Since we just failed the trylock; there must be an owner. + */ + newowner = rt_mutex_owner(&pi_state->pi_mutex); + BUG_ON(!newowner); + } else { + WARN_ON_ONCE(argowner != current); + if (oldowner == current) { + /* + * We raced against a concurrent self; things are + * already fixed up. Nothing to do. + */ + ret = 0; + goto out_unlock; + } + newowner = argowner; + } + + newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; + if (get_futex_value_locked(&uval, uaddr)) goto handle_fault; @@ -2434,9 +2475,9 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * Got the lock. We might not be the anticipated owner if we * did a lock-steal - fix up the PI-state in that case: * - * We can safely read pi_state->owner without holding wait_lock - * because we now own the rt_mutex, only the owner will attempt - * to change it. + * Speculative pi_state->owner read (we don't hold wait_lock); + * since we own the lock pi_state->owner == current is the + * stable state, anything else needs more attention. */ if (q->pi_state->owner != current) ret = fixup_pi_state_owner(uaddr, q, current); @@ -2444,6 +2485,19 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) } /* + * If we didn't get the lock; check if anybody stole it from us. In + * that case, we need to fix up the uval to point to them instead of + * us, otherwise bad things happen. [10] + * + * Another speculative read; pi_state->owner == current is unstable + * but needs our attention. + */ + if (q->pi_state->owner == current) { + ret = fixup_pi_state_owner(uaddr, q, NULL); + goto out; + } + + /* * Paranoia check. If we did not take the lock, then we should not be * the owner of the rt_mutex. */ diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 0ba0dd8863a7..5187dfe809ac 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -321,15 +321,23 @@ void irq_matrix_remove_reserved(struct irq_matrix *m) int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, bool reserved, unsigned int *mapped_cpu) { - unsigned int cpu; + unsigned int cpu, best_cpu, maxavl = 0; + struct cpumap *cm; + unsigned int bit; + best_cpu = UINT_MAX; for_each_cpu(cpu, msk) { - struct cpumap *cm = per_cpu_ptr(m->maps, cpu); - unsigned int bit; + cm = per_cpu_ptr(m->maps, cpu); - if (!cm->online) + if (!cm->online || cm->available <= maxavl) continue; + best_cpu = cpu; + maxavl = cm->available; + } + + if (maxavl) { + cm = per_cpu_ptr(m->maps, best_cpu); bit = matrix_alloc_area(m, cm, 1, false); if (bit < m->alloc_end) { cm->allocated++; @@ -338,8 +346,8 @@ int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, m->global_available--; if (reserved) m->global_reserved--; - *mapped_cpu = cpu; - trace_irq_matrix_alloc(bit, cpu, m, cm); + *mapped_cpu = best_cpu; + trace_irq_matrix_alloc(bit, best_cpu, m, cm); return bit; } } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6f3dba6e4e9e..65cc0cb984e6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1290,6 +1290,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, return ret; } +static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock) +{ + int ret = try_to_take_rt_mutex(lock, current, NULL); + + /* + * try_to_take_rt_mutex() sets the lock waiters bit + * unconditionally. Clean this up. + */ + fixup_rt_mutex_waiters(lock); + + return ret; +} + /* * Slow path try-lock function: */ @@ -1312,13 +1325,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) */ raw_spin_lock_irqsave(&lock->wait_lock, flags); - ret = try_to_take_rt_mutex(lock, current, NULL); - - /* - * try_to_take_rt_mutex() sets the lock waiters bit - * unconditionally. Clean this up. - */ - fixup_rt_mutex_waiters(lock); + ret = __rt_mutex_slowtrylock(lock); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); @@ -1505,6 +1512,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) return rt_mutex_slowtrylock(lock); } +int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) +{ + return __rt_mutex_slowtrylock(lock); +} + /** * rt_mutex_timed_lock - lock a rt_mutex interruptible * the timeout structure is provided diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 124e98ca0b17..68686b3ec3c1 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -148,6 +148,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter); extern int rt_mutex_futex_trylock(struct rt_mutex *l); +extern int __rt_mutex_futex_trylock(struct rt_mutex *l); extern void rt_mutex_futex_unlock(struct rt_mutex *lock); extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 644fa2e3d993..a7bf32aabfda 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2056,7 +2056,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) p->state = TASK_WAKING; if (p->in_iowait) { - delayacct_blkio_end(); + delayacct_blkio_end(p); atomic_dec(&task_rq(p)->nr_iowait); } @@ -2069,7 +2069,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) #else /* CONFIG_SMP */ if (p->in_iowait) { - delayacct_blkio_end(); + delayacct_blkio_end(p); atomic_dec(&task_rq(p)->nr_iowait); } @@ -2122,7 +2122,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) if (!task_on_rq_queued(p)) { if (p->in_iowait) { - delayacct_blkio_end(); + delayacct_blkio_end(p); atomic_dec(&rq->nr_iowait); } ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 89a9e1b4264a..0bcf00e3ce48 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1696,7 +1696,7 @@ void run_local_timers(void) hrtimer_run_queues(); /* Raise the softirq only if required. */ if (time_before(jiffies, base->clk)) { - if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) return; /* CPU is awake, so check the deferrable base. */ base++; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f274468cbc45..fc2838ac8b78 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -245,7 +245,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, */ #define __BPF_TP_EMIT() __BPF_ARG3_TP() #define __BPF_TP(...) \ - __trace_printk(1 /* Fake ip will not be printed. */, \ + __trace_printk(0 /* Fake ip */, \ fmt, ##__VA_ARGS__) #define __BPF_ARG1_TP(...) \ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ccdf3664e4a9..554b517c61a0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1119,15 +1119,11 @@ static struct ftrace_ops global_ops = { }; /* - * This is used by __kernel_text_address() to return true if the - * address is on a dynamically allocated trampoline that would - * not return true for either core_kernel_text() or - * is_module_text_address(). + * Used by the stack undwinder to know about dynamic ftrace trampolines. */ -bool is_ftrace_trampoline(unsigned long addr) +struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr) { - struct ftrace_ops *op; - bool ret = false; + struct ftrace_ops *op = NULL; /* * Some of the ops may be dynamically allocated, @@ -1144,15 +1140,24 @@ bool is_ftrace_trampoline(unsigned long addr) if (op->trampoline && op->trampoline_size) if (addr >= op->trampoline && addr < op->trampoline + op->trampoline_size) { - ret = true; - goto out; + preempt_enable_notrace(); + return op; } } while_for_each_ftrace_op(op); - - out: preempt_enable_notrace(); - return ret; + return NULL; +} + +/* + * This is used by __kernel_text_address() to return true if the + * address is on a dynamically allocated trampoline that would + * not return true for either core_kernel_text() or + * is_module_text_address(). + */ +bool is_ftrace_trampoline(unsigned long addr) +{ + return ftrace_ops_trampoline(addr) != NULL; } struct ftrace_page { diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0cddf60186da..5af2842dea96 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2579,8 +2579,7 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) bit = RB_CTX_NORMAL; else bit = pc & NMI_MASK ? RB_CTX_NMI : - pc & HARDIRQ_MASK ? RB_CTX_IRQ : - pc & SOFTIRQ_OFFSET ? 2 : RB_CTX_SOFTIRQ; + pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; if (unlikely(val & (1 << bit))) return 1; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2a8d8a294345..8e3f20a18a06 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2374,6 +2374,15 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); +/* + * Skip 3: + * + * trace_buffer_unlock_commit_regs() + * trace_event_buffer_commit() + * trace_event_raw_event_xxx() +*/ +# define STACK_SKIP 3 + void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct ring_buffer *buffer, struct ring_buffer_event *event, @@ -2383,16 +2392,12 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, __buffer_unlock_commit(buffer, event); /* - * If regs is not set, then skip the following callers: - * trace_buffer_unlock_commit_regs - * event_trigger_unlock_commit - * trace_event_buffer_commit - * trace_event_raw_event_sched_switch + * If regs is not set, then skip the necessary functions. * Note, we can still get here via blktrace, wakeup tracer * and mmiotrace, but that's ok if they lose a function or - * two. They are that meaningful. + * two. They are not that meaningful. */ - ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs); + ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } @@ -2579,11 +2584,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, trace.skip = skip; /* - * Add two, for this function and the call to save_stack_trace() + * Add one, for this function and the call to save_stack_trace() * If regs is set, then these functions will not be in the way. */ +#ifndef CONFIG_UNWINDER_ORC if (!regs) - trace.skip += 2; + trace.skip++; +#endif /* * Since events can happen in NMIs there's no safe way to @@ -2711,11 +2718,10 @@ void trace_dump_stack(int skip) local_save_flags(flags); - /* - * Skip 3 more, seems to get us at the caller of - * this function. - */ - skip += 3; +#ifndef CONFIG_UNWINDER_ORC + /* Skip 1 to skip this function. */ + skip++; +#endif __ftrace_trace_stack(global_trace.trace_buffer.buffer, flags, skip, preempt_count(), NULL); } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ec0f9aa4e151..1b87157edbff 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2213,6 +2213,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; + bool first = false; int last_i; int i; @@ -2220,15 +2221,28 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) list_for_each_entry_safe(call, p, &ftrace_events, list) { /* events are usually grouped together with systems */ if (!last_system || call->class->system != last_system) { + first = true; last_i = 0; last_system = call->class->system; } + /* + * Since calls are grouped by systems, the likelyhood that the + * next call in the iteration belongs to the same system as the + * previous call is high. As an optimization, we skip seaching + * for a map[] that matches the call's system if the last call + * was from the same system. That's what last_i is for. If the + * call has the same system as the previous call, then last_i + * will be the index of the first map[] that has a matching + * system. + */ for (i = last_i; i < len; i++) { if (call->class->system == map[i]->system) { /* Save the first system if need be */ - if (!last_i) + if (first) { last_i = i; + first = false; + } update_event_printk(call, map[i]); } } diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index f2ac9d44f6c4..87411482a46f 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1123,13 +1123,22 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; } #endif /* CONFIG_TRACER_SNAPSHOT */ #ifdef CONFIG_STACKTRACE +#ifdef CONFIG_UNWINDER_ORC +/* Skip 2: + * event_triggers_post_call() + * trace_event_raw_event_xxx() + */ +# define STACK_SKIP 2 +#else /* - * Skip 3: + * Skip 4: * stacktrace_trigger() * event_triggers_post_call() + * trace_event_buffer_commit() * trace_event_raw_event_xxx() */ -#define STACK_SKIP 3 +#define STACK_SKIP 4 +#endif static void stacktrace_trigger(struct event_trigger_data *data, void *rec) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 27f7ad12c4b1..b611cd36e22d 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -154,6 +154,24 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, preempt_enable_notrace(); } +#ifdef CONFIG_UNWINDER_ORC +/* + * Skip 2: + * + * function_stack_trace_call() + * ftrace_call() + */ +#define STACK_SKIP 2 +#else +/* + * Skip 3: + * __trace_stack() + * function_stack_trace_call() + * ftrace_call() + */ +#define STACK_SKIP 3 +#endif + static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) @@ -180,15 +198,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, if (likely(disabled == 1)) { pc = preempt_count(); trace_function(tr, ip, parent_ip, flags, pc); - /* - * skip over 5 funcs: - * __ftrace_trace_stack, - * __trace_stack, - * function_stack_trace_call - * ftrace_list_func - * ftrace_call - */ - __trace_stack(tr, flags, 5, pc); + __trace_stack(tr, flags, STACK_SKIP, pc); } atomic_dec(&data->disabled); @@ -367,14 +377,27 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip, tracer_tracing_off(tr); } +#ifdef CONFIG_UNWINDER_ORC /* - * Skip 4: + * Skip 3: + * + * function_trace_probe_call() + * ftrace_ops_assist_func() + * ftrace_call() + */ +#define FTRACE_STACK_SKIP 3 +#else +/* + * Skip 5: + * + * __trace_stack() * ftrace_stacktrace() * function_trace_probe_call() - * ftrace_ops_list_func() + * ftrace_ops_assist_func() * ftrace_call() */ -#define STACK_SKIP 4 +#define FTRACE_STACK_SKIP 5 +#endif static __always_inline void trace_stack(struct trace_array *tr) { @@ -384,7 +407,7 @@ static __always_inline void trace_stack(struct trace_array *tr) local_save_flags(flags); pc = preempt_count(); - __trace_stack(tr, flags, STACK_SKIP, pc); + __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc); } static void diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 43d18cb46308..f699122dab32 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,7 @@ #include <linux/moduleparam.h> #include <linux/uaccess.h> #include <linux/sched/isolation.h> +#include <linux/nmi.h> #include "workqueue_internal.h" @@ -4463,6 +4464,12 @@ void show_workqueue_state(void) if (pwq->nr_active || !list_empty(&pwq->delayed_works)) show_pwq(pwq); spin_unlock_irqrestore(&pwq->pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); } } @@ -4490,6 +4497,12 @@ void show_workqueue_state(void) pr_cont("\n"); next_pool: spin_unlock_irqrestore(&pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); } rcu_read_unlock_sched(); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index f369889e521d..e3938e395cba 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -6109,6 +6109,110 @@ static struct bpf_test tests[] = { { { ETH_HLEN, 42 } }, .fill_helper = bpf_fill_ld_abs_vlan_push_pop2, }, + /* Checking interpreter vs JIT wrt signed extended imms. */ + { + "JNE signed compare, test 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), + BPF_MOV64_REG(R2, R1), + BPF_ALU64_REG(BPF_AND, R2, R3), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R2, -17104896, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JNE signed compare, test 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), + BPF_MOV64_REG(R2, R1), + BPF_ALU64_REG(BPF_AND, R2, R3), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R2, 0xfefb0000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JNE signed compare, test 3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), + BPF_ALU32_IMM(BPF_MOV, R4, 0xfefb0000), + BPF_MOV64_REG(R2, R1), + BPF_ALU64_REG(BPF_AND, R2, R3), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_REG(BPF_JNE, R2, R4, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "JNE signed compare, test 4", + .u.insns_int = { + BPF_LD_IMM64(R1, -17104896), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R1, -17104896, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "JNE signed compare, test 5", + .u.insns_int = { + BPF_LD_IMM64(R1, 0xfefb0000), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R1, 0xfefb0000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JNE signed compare, test 6", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x7efb0000), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R1, 0x7efb0000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "JNE signed compare, test 7", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xffff0000), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xfefbbc12), + BPF_STMT(BPF_ALU | BPF_AND | BPF_X, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xfefb0000, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_K, 2), + }, + CLASSIC | FLAG_NO_DATA, + {}, + { { 0, 2 } }, + }, }; static struct net_device dev; diff --git a/mm/memory.c b/mm/memory.c index ca5674cbaff2..793004608332 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2857,8 +2857,11 @@ int do_swap_page(struct vm_fault *vmf) int ret = 0; bool vma_readahead = swap_use_vma_readahead(); - if (vma_readahead) + if (vma_readahead) { page = swap_readahead_detect(vmf, &swap_ra); + swapcache = page; + } + if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) { if (page) put_page(page); @@ -2889,9 +2892,12 @@ int do_swap_page(struct vm_fault *vmf) delayacct_set_flag(DELAYACCT_PF_SWAPIN); - if (!page) + if (!page) { page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, vmf->address); + swapcache = page; + } + if (!page) { struct swap_info_struct *si = swp_swap_info(entry); diff --git a/mm/page_owner.c b/mm/page_owner.c index 8592543a0f15..270a8219ccd0 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -616,7 +616,6 @@ static void init_early_allocated_pages(void) { pg_data_t *pgdat; - drain_all_pages(NULL); for_each_online_pgdat(pgdat) init_zones_in_node(pgdat); } diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index d22b84310f6d..ae3c2a35d61b 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -30,10 +30,37 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw) return true; } +static inline bool pfn_in_hpage(struct page *hpage, unsigned long pfn) +{ + unsigned long hpage_pfn = page_to_pfn(hpage); + + /* THP can be referenced by any subpage */ + return pfn >= hpage_pfn && pfn - hpage_pfn < hpage_nr_pages(hpage); +} + +/** + * check_pte - check if @pvmw->page is mapped at the @pvmw->pte + * + * page_vma_mapped_walk() found a place where @pvmw->page is *potentially* + * mapped. check_pte() has to validate this. + * + * @pvmw->pte may point to empty PTE, swap PTE or PTE pointing to arbitrary + * page. + * + * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration + * entry that points to @pvmw->page or any subpage in case of THP. + * + * If PVMW_MIGRATION flag is not set, returns true if @pvmw->pte points to + * @pvmw->page or any subpage in case of THP. + * + * Otherwise, return false. + * + */ static bool check_pte(struct page_vma_mapped_walk *pvmw) { + unsigned long pfn; + if (pvmw->flags & PVMW_MIGRATION) { -#ifdef CONFIG_MIGRATION swp_entry_t entry; if (!is_swap_pte(*pvmw->pte)) return false; @@ -41,38 +68,25 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) if (!is_migration_entry(entry)) return false; - if (migration_entry_to_page(entry) - pvmw->page >= - hpage_nr_pages(pvmw->page)) { - return false; - } - if (migration_entry_to_page(entry) < pvmw->page) - return false; -#else - WARN_ON_ONCE(1); -#endif - } else { - if (is_swap_pte(*pvmw->pte)) { - swp_entry_t entry; - entry = pte_to_swp_entry(*pvmw->pte); - if (is_device_private_entry(entry) && - device_private_entry_to_page(entry) == pvmw->page) - return true; - } + pfn = migration_entry_to_pfn(entry); + } else if (is_swap_pte(*pvmw->pte)) { + swp_entry_t entry; - if (!pte_present(*pvmw->pte)) + /* Handle un-addressable ZONE_DEVICE memory */ + entry = pte_to_swp_entry(*pvmw->pte); + if (!is_device_private_entry(entry)) return false; - /* THP can be referenced by any subpage */ - if (pte_page(*pvmw->pte) - pvmw->page >= - hpage_nr_pages(pvmw->page)) { - return false; - } - if (pte_page(*pvmw->pte) < pvmw->page) + pfn = device_private_entry_to_pfn(entry); + } else { + if (!pte_present(*pvmw->pte)) return false; + + pfn = pte_pfn(*pvmw->pte); } - return true; + return pfn_in_hpage(pvmw->page, pfn); } /** diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 80559fd11b7e..8e13a64d8c99 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -760,7 +760,7 @@ static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst, static inline bool br_multicast_is_router(struct net_bridge *br) { - return 0; + return false; } static inline bool br_multicast_querier_exists(struct net_bridge *br, diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 37817d25b63d..02c4b409d317 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2445,7 +2445,6 @@ static int __init ebtables_init(void) return ret; } - printk(KERN_INFO "Ebtables v2.0 registered\n"); return 0; } @@ -2453,7 +2452,6 @@ static void __exit ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); xt_unregister_target(&ebt_standard_target); - printk(KERN_INFO "Ebtables v2.0 unregistered\n"); } EXPORT_SYMBOL(ebt_register_table); diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 86774b5c3b73..5160cf614176 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -42,40 +42,6 @@ nft_do_chain_bridge(void *priv, return nft_do_chain(&pkt, priv); } -static struct nft_af_info nft_af_bridge __read_mostly = { - .family = NFPROTO_BRIDGE, - .nhooks = NF_BR_NUMHOOKS, - .owner = THIS_MODULE, -}; - -static int nf_tables_bridge_init_net(struct net *net) -{ - net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.bridge == NULL) - return -ENOMEM; - - memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge)); - - if (nft_register_afinfo(net, net->nft.bridge) < 0) - goto err; - - return 0; -err: - kfree(net->nft.bridge); - return -ENOMEM; -} - -static void nf_tables_bridge_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.bridge); - kfree(net->nft.bridge); -} - -static struct pernet_operations nf_tables_bridge_net_ops = { - .init = nf_tables_bridge_init_net, - .exit = nf_tables_bridge_exit_net, -}; - static const struct nf_chain_type filter_bridge = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -97,27 +63,11 @@ static const struct nf_chain_type filter_bridge = { static int __init nf_tables_bridge_init(void) { - int ret; - - ret = nft_register_chain_type(&filter_bridge); - if (ret < 0) - return ret; - - ret = register_pernet_subsys(&nf_tables_bridge_net_ops); - if (ret < 0) - goto err_register_subsys; - - return ret; - -err_register_subsys: - nft_unregister_chain_type(&filter_bridge); - - return ret; + return nft_register_chain_type(&filter_bridge); } static void __exit nf_tables_bridge_exit(void) { - unregister_pernet_subsys(&nf_tables_bridge_net_ops); nft_unregister_chain_type(&filter_bridge); } @@ -126,4 +76,4 @@ module_exit(nf_tables_bridge_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <[email protected]>"); -MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE); +MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter"); diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 655ed7032150..a1e85f032108 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -352,15 +352,14 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) u8 cmdrsp; u8 cmd; int ret = -1; - u16 tmp16; u8 len; u8 param[255]; - u8 linkid; + u8 linkid = 0; struct cfctrl *cfctrl = container_obj(layer); struct cfctrl_request_info rsp, *req; - cfpkt_extr_head(pkt, &cmdrsp, 1); + cmdrsp = cfpkt_extr_head_u8(pkt); cmd = cmdrsp & CFCTRL_CMD_MASK; if (cmd != CFCTRL_CMD_LINK_ERR && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp) @@ -378,13 +377,12 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) u8 physlinkid; u8 prio; u8 tmp; - u32 tmp32; u8 *cp; int i; struct cfctrl_link_param linkparam; memset(&linkparam, 0, sizeof(linkparam)); - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); serv = tmp & CFCTRL_SRV_MASK; linkparam.linktype = serv; @@ -392,13 +390,13 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) servtype = tmp >> 4; linkparam.chtype = servtype; - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); physlinkid = tmp & 0x07; prio = tmp >> 3; linkparam.priority = prio; linkparam.phyid = physlinkid; - cfpkt_extr_head(pkt, &endpoint, 1); + endpoint = cfpkt_extr_head_u8(pkt); linkparam.endpoint = endpoint & 0x03; switch (serv) { @@ -407,45 +405,43 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_VIDEO: - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); linkparam.u.video.connid = tmp; if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_DATAGRAM: - cfpkt_extr_head(pkt, &tmp32, 4); linkparam.u.datagram.connid = - le32_to_cpu(tmp32); + cfpkt_extr_head_u32(pkt); if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_RFM: /* Construct a frame, convert * DatagramConnectionID * to network format long and copy it out... */ - cfpkt_extr_head(pkt, &tmp32, 4); linkparam.u.rfm.connid = - le32_to_cpu(tmp32); + cfpkt_extr_head_u32(pkt); cp = (u8 *) linkparam.u.rfm.volume; - for (cfpkt_extr_head(pkt, &tmp, 1); + for (tmp = cfpkt_extr_head_u8(pkt); cfpkt_more(pkt) && tmp != '\0'; - cfpkt_extr_head(pkt, &tmp, 1)) + tmp = cfpkt_extr_head_u8(pkt)) *cp++ = tmp; *cp = '\0'; if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_UTIL: @@ -454,13 +450,11 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) * to network format long and copy it out... */ /* Fifosize KB */ - cfpkt_extr_head(pkt, &tmp16, 2); linkparam.u.utility.fifosize_kb = - le16_to_cpu(tmp16); + cfpkt_extr_head_u16(pkt); /* Fifosize bufs */ - cfpkt_extr_head(pkt, &tmp16, 2); linkparam.u.utility.fifosize_bufs = - le16_to_cpu(tmp16); + cfpkt_extr_head_u16(pkt); /* name */ cp = (u8 *) linkparam.u.utility.name; caif_assert(sizeof(linkparam.u.utility.name) @@ -468,24 +462,24 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) for (i = 0; i < UTILITY_NAME_LENGTH && cfpkt_more(pkt); i++) { - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); *cp++ = tmp; } /* Length */ - cfpkt_extr_head(pkt, &len, 1); + len = cfpkt_extr_head_u8(pkt); linkparam.u.utility.paramlen = len; /* Param Data */ cp = linkparam.u.utility.params; while (cfpkt_more(pkt) && len--) { - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); *cp++ = tmp; } if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); /* Length */ - cfpkt_extr_head(pkt, &len, 1); + len = cfpkt_extr_head_u8(pkt); /* Param Data */ cfpkt_extr_head(pkt, ¶m, len); break; @@ -522,7 +516,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) } break; case CFCTRL_CMD_LINK_DESTROY: - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); break; case CFCTRL_CMD_LINK_ERR: diff --git a/net/can/af_can.c b/net/can/af_can.c index f22b886ed081..6da324550eec 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -721,20 +721,16 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (WARN_ONCE(dev->type != ARPHRD_CAN || - skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN, - "PF_CAN: dropped non conform CAN skbuf: " - "dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len)) - goto drop; + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || + cfd->len > CAN_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n", + dev->type, skb->len, cfd->len); + kfree_skb(skb); + return NET_RX_DROP; + } can_receive(skb, dev); return NET_RX_SUCCESS; - -drop: - kfree_skb(skb); - return NET_RX_DROP; } static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, @@ -742,20 +738,16 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (WARN_ONCE(dev->type != ARPHRD_CAN || - skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN, - "PF_CAN: dropped non conform CAN FD skbuf: " - "dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len)) - goto drop; + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || + cfd->len > CANFD_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n", + dev->type, skb->len, cfd->len); + kfree_skb(skb); + return NET_RX_DROP; + } can_receive(skb, dev); return NET_RX_SUCCESS; - -drop: - kfree_skb(skb); - return NET_RX_DROP; } /* diff --git a/net/core/dev.c b/net/core/dev.c index 94435cd09072..4670ccabe23a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1694,7 +1694,6 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); /** * call_netdevice_notifiers_info - call all network notifier blocks * @val: value passed unmodified to notifier function - * @dev: net_device pointer passed unmodified to notifier function * @info: notifier information data * * Call all network notifier blocks. Parameters and return value @@ -3167,10 +3166,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len = skb_transport_header(skb) - skb_mac_header(skb); /* + transport layer */ - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + const struct tcphdr *th; + struct tcphdr _tcphdr; + + th = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_tcphdr), &_tcphdr); + if (likely(th)) + hdr_len += __tcp_hdrlen(th); + } else { + struct udphdr _udphdr; + + if (skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_udphdr), &_udphdr)) + hdr_len += sizeof(struct udphdr); + } if (shinfo->gso_type & SKB_GSO_DODGY) gso_segs = DIV_ROUND_UP(skb->len - hdr_len, @@ -6425,6 +6435,7 @@ rollback: * netdev_upper_dev_link - Add a link to the upper device * @dev: device * @upper_dev: new upper device + * @extack: netlink extended ack * * Adds a link to device which is upper to this one. The caller must hold * the RTNL lock. On a failure a negative errno code is returned. @@ -6446,6 +6457,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * @upper_dev: new upper device * @upper_priv: upper device private * @upper_info: upper info to be passed down via notifier + * @extack: netlink extended ack * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7e690d0ccd05..0ab1af04296c 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -18,26 +18,10 @@ * match. --pb */ -static int dev_ifname(struct net *net, struct ifreq __user *arg) +static int dev_ifname(struct net *net, struct ifreq *ifr) { - struct ifreq ifr; - int error; - - /* - * Fetch the caller's info block. - */ - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - ifr.ifr_name[IFNAMSIZ-1] = 0; - - error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex); - if (error) - return error; - - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return 0; + ifr->ifr_name[IFNAMSIZ-1] = 0; + return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex); } static gifconf_func_t *gifconf_list[NPROTO]; @@ -66,9 +50,8 @@ EXPORT_SYMBOL(register_gifconf); * Thus we will need a 'compatibility mode'. */ -static int dev_ifconf(struct net *net, char __user *arg) +int dev_ifconf(struct net *net, struct ifconf *ifc, int size) { - struct ifconf ifc; struct net_device *dev; char __user *pos; int len; @@ -79,11 +62,8 @@ static int dev_ifconf(struct net *net, char __user *arg) * Fetch the caller's info block. */ - if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) - return -EFAULT; - - pos = ifc.ifc_buf; - len = ifc.ifc_len; + pos = ifc->ifc_buf; + len = ifc->ifc_len; /* * Loop over the interfaces, and write an info block for each. @@ -95,10 +75,10 @@ static int dev_ifconf(struct net *net, char __user *arg) if (gifconf_list[i]) { int done; if (!pos) - done = gifconf_list[i](dev, NULL, 0); + done = gifconf_list[i](dev, NULL, 0, size); else done = gifconf_list[i](dev, pos + total, - len - total); + len - total, size); if (done < 0) return -EFAULT; total += done; @@ -109,12 +89,12 @@ static int dev_ifconf(struct net *net, char __user *arg) /* * All done. Write the updated control block back to the caller. */ - ifc.ifc_len = total; + ifc->ifc_len = total; /* * Both BSD and Solaris return 0 here, so we do too. */ - return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; + return 0; } /* @@ -406,53 +386,24 @@ EXPORT_SYMBOL(dev_load); * positive or a negative errno code on error. */ -int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout) { - struct ifreq ifr; int ret; char *colon; - /* One special case: SIOCGIFCONF takes ifconf argument - and requires shared lock, because it sleeps writing - to user space. - */ - - if (cmd == SIOCGIFCONF) { - rtnl_lock(); - ret = dev_ifconf(net, (char __user *) arg); - rtnl_unlock(); - return ret; - } + if (need_copyout) + *need_copyout = true; if (cmd == SIOCGIFNAME) - return dev_ifname(net, (struct ifreq __user *)arg); - - /* - * Take care of Wireless Extensions. Unfortunately struct iwreq - * isn't a proper subset of struct ifreq (it's 8 byte shorter) - * so we need to treat it specially, otherwise applications may - * fault if the struct they're passing happens to land at the - * end of a mapped page. - */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - struct iwreq iwr; - - if (copy_from_user(&iwr, arg, sizeof(iwr))) - return -EFAULT; - - iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0; + return dev_ifname(net, ifr); - return wext_handle_ioctl(net, &iwr, cmd, arg); - } - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - ifr.ifr_name[IFNAMSIZ-1] = 0; + ifr->ifr_name[IFNAMSIZ-1] = 0; - colon = strchr(ifr.ifr_name, ':'); + colon = strchr(ifr->ifr_name, ':'); if (colon) *colon = 0; + dev_load(net, ifr->ifr_name); + /* * See which interface the caller is talking about. */ @@ -472,31 +423,19 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: - dev_load(net, ifr.ifr_name); rcu_read_lock(); - ret = dev_ifsioc_locked(net, &ifr, cmd); + ret = dev_ifsioc_locked(net, ifr, cmd); rcu_read_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } + if (colon) + *colon = ':'; return ret; case SIOCETHTOOL: - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ethtool(net, &ifr); + ret = dev_ethtool(net, ifr); rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } + if (colon) + *colon = ':'; return ret; /* @@ -510,17 +449,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFNAME: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); + ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } + if (colon) + *colon = ':'; return ret; /* @@ -561,10 +494,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); + ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); + if (need_copyout) + *need_copyout = false; return ret; case SIOCGIFMEM: @@ -584,13 +518,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); + ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); - if (!ret && copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; return ret; } return -ENOTTY; diff --git a/net/core/devlink.c b/net/core/devlink.c index dd7d6dd07bfb..18d385ed8237 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2294,7 +2294,7 @@ static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, counters_enable); } -struct devlink_resource * +static struct devlink_resource * devlink_resource_find(struct devlink *devlink, struct devlink_resource *resource, u64 resource_id) { @@ -2319,7 +2319,8 @@ devlink_resource_find(struct devlink *devlink, return NULL; } -void devlink_resource_validate_children(struct devlink_resource *resource) +static void +devlink_resource_validate_children(struct devlink_resource *resource) { struct devlink_resource *child_resource; bool size_valid = true; @@ -3181,6 +3182,7 @@ int devlink_resource_register(struct devlink *devlink, resource_list = &parent_resource->resource_list; resource->parent = parent_resource; } else { + kfree(resource); err = -EINVAL; goto out; } diff --git a/net/core/filter.c b/net/core/filter.c index db2ee8c7e1bd..18da42a81d0c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -458,6 +458,10 @@ do_pass: convert_bpf_extensions(fp, &insn)) break; + if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || + fp->code == (BPF_ALU | BPF_MOD | BPF_X)) + *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); break; @@ -2861,7 +2865,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; static unsigned short bpf_tunnel_key_af(u64 flags) @@ -3150,7 +3154,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) @@ -3456,6 +3460,8 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_xdp_event_output_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_csum_diff: + return &bpf_csum_diff_proto; case BPF_FUNC_xdp_adjust_head: return &bpf_xdp_adjust_head_proto; case BPF_FUNC_xdp_adjust_meta: @@ -4526,6 +4532,7 @@ const struct bpf_verifier_ops sk_filter_verifier_ops = { }; const struct bpf_prog_ops sk_filter_prog_ops = { + .test_run = bpf_prog_test_run_skb, }; const struct bpf_verifier_ops tc_cls_act_verifier_ops = { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 02db7b122a73..559db9ea8d86 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1031,8 +1031,8 @@ ip_proto_again: out_good: ret = true; - key_control->thoff = (u16)nhoff; out: + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; @@ -1040,7 +1040,6 @@ out: out_bad: ret = false; - key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); goto out; } EXPORT_SYMBOL(__skb_flow_dissect); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 982861607f88..e38e641e98d5 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -92,7 +92,7 @@ static bool linkwatch_urgent_event(struct net_device *dev) if (dev->ifindex != dev_get_iflink(dev)) return true; - if (dev->priv_flags & IFF_TEAM_PORT) + if (netif_is_lag_port(dev) || netif_is_lag_master(dev)) return true; return netif_carrier_ok(dev) && qdisc_tx_changing(dev); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7bf8b85ade16..c4a28f4667b6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -295,10 +295,31 @@ static ssize_t carrier_changes_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); return sprintf(buf, fmt_dec, - atomic_read(&netdev->carrier_changes)); + atomic_read(&netdev->carrier_up_count) + + atomic_read(&netdev->carrier_down_count)); } static DEVICE_ATTR_RO(carrier_changes); +static ssize_t carrier_up_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + + return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_up_count)); +} +static DEVICE_ATTR_RO(carrier_up_count); + +static ssize_t carrier_down_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + + return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_down_count)); +} +static DEVICE_ATTR_RO(carrier_down_count); + /* read-write attributes */ static int change_mtu(struct net_device *dev, unsigned long new_mtu) @@ -547,6 +568,8 @@ static struct attribute *net_class_attrs[] __ro_after_init = { &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, &dev_attr_proto_down.attr, + &dev_attr_carrier_up_count.attr, + &dev_attr_carrier_down_count.attr, NULL, }; ATTRIBUTE_GROUPS(net_class); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1ccb953b3b09..3cad5f51afd3 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -439,13 +439,40 @@ struct net *copy_net_ns(unsigned long flags, return net; } +static void unhash_nsid(struct net *net, struct net *last) +{ + struct net *tmp; + /* This function is only called from cleanup_net() work, + * and this work is the only process, that may delete + * a net from net_namespace_list. So, when the below + * is executing, the list may only grow. Thus, we do not + * use for_each_net_rcu() or rtnl_lock(). + */ + for_each_net(tmp) { + int id; + + spin_lock_bh(&tmp->nsid_lock); + id = __peernet2id(tmp, net); + if (id >= 0) + idr_remove(&tmp->netns_ids, id); + spin_unlock_bh(&tmp->nsid_lock); + if (id >= 0) + rtnl_net_notifyid(tmp, RTM_DELNSID, id); + if (tmp == last) + break; + } + spin_lock_bh(&net->nsid_lock); + idr_destroy(&net->netns_ids); + spin_unlock_bh(&net->nsid_lock); +} + static DEFINE_SPINLOCK(cleanup_list_lock); static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; - struct net *net, *tmp; + struct net *net, *tmp, *last; struct list_head net_kill_list; LIST_HEAD(net_exit_list); @@ -458,26 +485,25 @@ static void cleanup_net(struct work_struct *work) /* Don't let anyone else find us. */ rtnl_lock(); - list_for_each_entry(net, &net_kill_list, cleanup_list) { + list_for_each_entry(net, &net_kill_list, cleanup_list) list_del_rcu(&net->list); - list_add_tail(&net->exit_list, &net_exit_list); - for_each_net(tmp) { - int id; - - spin_lock_bh(&tmp->nsid_lock); - id = __peernet2id(tmp, net); - if (id >= 0) - idr_remove(&tmp->netns_ids, id); - spin_unlock_bh(&tmp->nsid_lock); - if (id >= 0) - rtnl_net_notifyid(tmp, RTM_DELNSID, id); - } - spin_lock_bh(&net->nsid_lock); - idr_destroy(&net->netns_ids); - spin_unlock_bh(&net->nsid_lock); + /* Cache last net. After we unlock rtnl, no one new net + * added to net_namespace_list can assign nsid pointer + * to a net from net_kill_list (see peernet2id_alloc()). + * So, we skip them in unhash_nsid(). + * + * Note, that unhash_nsid() does not delete nsid links + * between net_kill_list's nets, as they've already + * deleted from net_namespace_list. But, this would be + * useless anyway, as netns_ids are destroyed there. + */ + last = list_last_entry(&net_namespace_list, struct net, list); + rtnl_unlock(); + list_for_each_entry(net, &net_kill_list, cleanup_list) { + unhash_nsid(net, last); + list_add_tail(&net->exit_list, &net_exit_list); } - rtnl_unlock(); /* * Another CPU might be rcu-iterating the list, wait for it. diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 4fcfcb14e7c6..b8ab5c829511 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -184,25 +184,44 @@ #define func_enter() pr_debug("entering %s\n", __func__); +#define PKT_FLAGS \ + pf(IPV6) /* Interface in IPV6 Mode */ \ + pf(IPSRC_RND) /* IP-Src Random */ \ + pf(IPDST_RND) /* IP-Dst Random */ \ + pf(TXSIZE_RND) /* Transmit size is random */ \ + pf(UDPSRC_RND) /* UDP-Src Random */ \ + pf(UDPDST_RND) /* UDP-Dst Random */ \ + pf(UDPCSUM) /* Include UDP checksum */ \ + pf(NO_TIMESTAMP) /* Don't timestamp packets (default TS) */ \ + pf(MPLS_RND) /* Random MPLS labels */ \ + pf(QUEUE_MAP_RND) /* queue map Random */ \ + pf(QUEUE_MAP_CPU) /* queue map mirrors smp_processor_id() */ \ + pf(FLOW_SEQ) /* Sequential flows */ \ + pf(IPSEC) /* ipsec on for flows */ \ + pf(MACSRC_RND) /* MAC-Src Random */ \ + pf(MACDST_RND) /* MAC-Dst Random */ \ + pf(VID_RND) /* Random VLAN ID */ \ + pf(SVID_RND) /* Random SVLAN ID */ \ + pf(NODE) /* Node memory alloc*/ \ + +#define pf(flag) flag##_SHIFT, +enum pkt_flags { + PKT_FLAGS +}; +#undef pf + /* Device flag bits */ -#define F_IPSRC_RND (1<<0) /* IP-Src Random */ -#define F_IPDST_RND (1<<1) /* IP-Dst Random */ -#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ -#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ -#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ -#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ -#define F_TXSIZE_RND (1<<6) /* Transmit size is random */ -#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ -#define F_MPLS_RND (1<<8) /* Random MPLS labels */ -#define F_VID_RND (1<<9) /* Random VLAN ID */ -#define F_SVID_RND (1<<10) /* Random SVLAN ID */ -#define F_FLOW_SEQ (1<<11) /* Sequential flows */ -#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ -#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ -#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ -#define F_NODE (1<<15) /* Node memory alloc*/ -#define F_UDPCSUM (1<<16) /* Include UDP checksum */ -#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */ +#define pf(flag) static const __u32 F_##flag = (1<<flag##_SHIFT); +PKT_FLAGS +#undef pf + +#define pf(flag) __stringify(flag), +static char *pkt_flag_names[] = { + PKT_FLAGS +}; +#undef pf + +#define NR_PKT_FLAGS ARRAY_SIZE(pkt_flag_names) /* Thread control flag bits */ #define T_STOP (1<<0) /* Stop run */ @@ -534,6 +553,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) { const struct pktgen_dev *pkt_dev = seq->private; ktime_t stopped; + unsigned int i; u64 idle; seq_printf(seq, @@ -595,7 +615,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->src_mac_count, pkt_dev->dst_mac_count); if (pkt_dev->nr_labels) { - unsigned int i; seq_puts(seq, " mpls: "); for (i = 0; i < pkt_dev->nr_labels; i++) seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), @@ -631,68 +650,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, " Flags: "); - if (pkt_dev->flags & F_IPV6) - seq_puts(seq, "IPV6 "); - - if (pkt_dev->flags & F_IPSRC_RND) - seq_puts(seq, "IPSRC_RND "); - - if (pkt_dev->flags & F_IPDST_RND) - seq_puts(seq, "IPDST_RND "); - - if (pkt_dev->flags & F_TXSIZE_RND) - seq_puts(seq, "TXSIZE_RND "); - - if (pkt_dev->flags & F_UDPSRC_RND) - seq_puts(seq, "UDPSRC_RND "); - - if (pkt_dev->flags & F_UDPDST_RND) - seq_puts(seq, "UDPDST_RND "); - - if (pkt_dev->flags & F_UDPCSUM) - seq_puts(seq, "UDPCSUM "); - - if (pkt_dev->flags & F_NO_TIMESTAMP) - seq_puts(seq, "NO_TIMESTAMP "); - - if (pkt_dev->flags & F_MPLS_RND) - seq_puts(seq, "MPLS_RND "); - - if (pkt_dev->flags & F_QUEUE_MAP_RND) - seq_puts(seq, "QUEUE_MAP_RND "); + for (i = 0; i < NR_PKT_FLAGS; i++) { + if (i == F_FLOW_SEQ) + if (!pkt_dev->cflows) + continue; - if (pkt_dev->flags & F_QUEUE_MAP_CPU) - seq_puts(seq, "QUEUE_MAP_CPU "); - - if (pkt_dev->cflows) { - if (pkt_dev->flags & F_FLOW_SEQ) - seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/ - else - seq_puts(seq, "FLOW_RND "); - } + if (pkt_dev->flags & (1 << i)) + seq_printf(seq, "%s ", pkt_flag_names[i]); + else if (i == F_FLOW_SEQ) + seq_puts(seq, "FLOW_RND "); #ifdef CONFIG_XFRM - if (pkt_dev->flags & F_IPSEC_ON) { - seq_puts(seq, "IPSEC "); - if (pkt_dev->spi) + if (i == F_IPSEC && pkt_dev->spi) seq_printf(seq, "spi:%u", pkt_dev->spi); - } #endif - - if (pkt_dev->flags & F_MACSRC_RND) - seq_puts(seq, "MACSRC_RND "); - - if (pkt_dev->flags & F_MACDST_RND) - seq_puts(seq, "MACDST_RND "); - - if (pkt_dev->flags & F_VID_RND) - seq_puts(seq, "VID_RND "); - - if (pkt_dev->flags & F_SVID_RND) - seq_puts(seq, "SVID_RND "); - - if (pkt_dev->flags & F_NODE) - seq_puts(seq, "NODE_ALLOC "); + } seq_puts(seq, "\n"); @@ -858,6 +830,35 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev) return i; } +static __u32 pktgen_read_flag(const char *f, bool *disable) +{ + __u32 i; + + if (f[0] == '!') { + *disable = true; + f++; + } + + for (i = 0; i < NR_PKT_FLAGS; i++) { + if (!IS_ENABLED(CONFIG_XFRM) && i == IPSEC_SHIFT) + continue; + + /* allow only disabling ipv6 flag */ + if (!*disable && i == IPV6_SHIFT) + continue; + + if (strcmp(f, pkt_flag_names[i]) == 0) + return 1 << i; + } + + if (strcmp(f, "FLOW_RND") == 0) { + *disable = !*disable; + return F_FLOW_SEQ; + } + + return 0; +} + static ssize_t pktgen_if_write(struct file *file, const char __user * user_buffer, size_t count, loff_t * offset) @@ -1215,7 +1216,10 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "flag")) { + __u32 flag; char f[32]; + bool disable = false; + memset(f, 0, 32); len = strn_len(&user_buffer[i], sizeof(f) - 1); if (len < 0) @@ -1224,107 +1228,15 @@ static ssize_t pktgen_if_write(struct file *file, if (copy_from_user(f, &user_buffer[i], len)) return -EFAULT; i += len; - if (strcmp(f, "IPSRC_RND") == 0) - pkt_dev->flags |= F_IPSRC_RND; - - else if (strcmp(f, "!IPSRC_RND") == 0) - pkt_dev->flags &= ~F_IPSRC_RND; - - else if (strcmp(f, "TXSIZE_RND") == 0) - pkt_dev->flags |= F_TXSIZE_RND; - - else if (strcmp(f, "!TXSIZE_RND") == 0) - pkt_dev->flags &= ~F_TXSIZE_RND; - - else if (strcmp(f, "IPDST_RND") == 0) - pkt_dev->flags |= F_IPDST_RND; - - else if (strcmp(f, "!IPDST_RND") == 0) - pkt_dev->flags &= ~F_IPDST_RND; - - else if (strcmp(f, "UDPSRC_RND") == 0) - pkt_dev->flags |= F_UDPSRC_RND; - - else if (strcmp(f, "!UDPSRC_RND") == 0) - pkt_dev->flags &= ~F_UDPSRC_RND; - - else if (strcmp(f, "UDPDST_RND") == 0) - pkt_dev->flags |= F_UDPDST_RND; - - else if (strcmp(f, "!UDPDST_RND") == 0) - pkt_dev->flags &= ~F_UDPDST_RND; - - else if (strcmp(f, "MACSRC_RND") == 0) - pkt_dev->flags |= F_MACSRC_RND; - - else if (strcmp(f, "!MACSRC_RND") == 0) - pkt_dev->flags &= ~F_MACSRC_RND; - - else if (strcmp(f, "MACDST_RND") == 0) - pkt_dev->flags |= F_MACDST_RND; - - else if (strcmp(f, "!MACDST_RND") == 0) - pkt_dev->flags &= ~F_MACDST_RND; - - else if (strcmp(f, "MPLS_RND") == 0) - pkt_dev->flags |= F_MPLS_RND; - - else if (strcmp(f, "!MPLS_RND") == 0) - pkt_dev->flags &= ~F_MPLS_RND; - else if (strcmp(f, "VID_RND") == 0) - pkt_dev->flags |= F_VID_RND; + flag = pktgen_read_flag(f, &disable); - else if (strcmp(f, "!VID_RND") == 0) - pkt_dev->flags &= ~F_VID_RND; - - else if (strcmp(f, "SVID_RND") == 0) - pkt_dev->flags |= F_SVID_RND; - - else if (strcmp(f, "!SVID_RND") == 0) - pkt_dev->flags &= ~F_SVID_RND; - - else if (strcmp(f, "FLOW_SEQ") == 0) - pkt_dev->flags |= F_FLOW_SEQ; - - else if (strcmp(f, "QUEUE_MAP_RND") == 0) - pkt_dev->flags |= F_QUEUE_MAP_RND; - - else if (strcmp(f, "!QUEUE_MAP_RND") == 0) - pkt_dev->flags &= ~F_QUEUE_MAP_RND; - - else if (strcmp(f, "QUEUE_MAP_CPU") == 0) - pkt_dev->flags |= F_QUEUE_MAP_CPU; - - else if (strcmp(f, "!QUEUE_MAP_CPU") == 0) - pkt_dev->flags &= ~F_QUEUE_MAP_CPU; -#ifdef CONFIG_XFRM - else if (strcmp(f, "IPSEC") == 0) - pkt_dev->flags |= F_IPSEC_ON; -#endif - - else if (strcmp(f, "!IPV6") == 0) - pkt_dev->flags &= ~F_IPV6; - - else if (strcmp(f, "NODE_ALLOC") == 0) - pkt_dev->flags |= F_NODE; - - else if (strcmp(f, "!NODE_ALLOC") == 0) - pkt_dev->flags &= ~F_NODE; - - else if (strcmp(f, "UDPCSUM") == 0) - pkt_dev->flags |= F_UDPCSUM; - - else if (strcmp(f, "!UDPCSUM") == 0) - pkt_dev->flags &= ~F_UDPCSUM; - - else if (strcmp(f, "NO_TIMESTAMP") == 0) - pkt_dev->flags |= F_NO_TIMESTAMP; - - else if (strcmp(f, "!NO_TIMESTAMP") == 0) - pkt_dev->flags &= ~F_NO_TIMESTAMP; - - else { + if (flag) { + if (disable) + pkt_dev->flags &= ~flag; + else + pkt_dev->flags |= flag; + } else { sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, @@ -2541,7 +2453,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; #ifdef CONFIG_XFRM - if (pkt_dev->flags & F_IPSEC_ON) + if (pkt_dev->flags & F_IPSEC) get_ipsec_sa(pkt_dev, flow); #endif pkt_dev->nflows++; @@ -2646,7 +2558,7 @@ static void free_SAs(struct pktgen_dev *pkt_dev) static int process_ipsec(struct pktgen_dev *pkt_dev, struct sk_buff *skb, __be16 protocol) { - if (pkt_dev->flags & F_IPSEC_ON) { + if (pkt_dev->flags & F_IPSEC) { struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; int nhead = 0; if (x) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 16d644a4f974..97874daa1336 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -990,6 +990,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(1) /* IFLA_PROTO_DOWN */ + nla_total_size(4) /* IFLA_IF_NETNSID */ + + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */ + + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */ + 0; } @@ -1551,8 +1553,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, - atomic_read(&dev->carrier_changes)) || - nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) + atomic_read(&dev->carrier_up_count) + + atomic_read(&dev->carrier_down_count)) || + nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) || + nla_put_u32(skb, IFLA_CARRIER_UP_COUNT, + atomic_read(&dev->carrier_up_count)) || + nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT, + atomic_read(&dev->carrier_down_count))) goto nla_put_failure; if (event != IFLA_EVENT_NONE) { @@ -1656,6 +1663,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_EVENT] = { .type = NLA_U32 }, [IFLA_GROUP] = { .type = NLA_U32 }, [IFLA_IF_NETNSID] = { .type = NLA_S32 }, + [IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 }, + [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a47ad6cd41c0..f2d0462611c3 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -25,6 +25,7 @@ static int zero = 0; static int one = 1; +static int two __maybe_unused = 2; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; @@ -250,6 +251,46 @@ static int proc_do_rss_key(struct ctl_table *table, int write, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } +#ifdef CONFIG_BPF_JIT +static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret, jit_enable = *(int *)table->data; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &jit_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + if (jit_enable < 2 || + (jit_enable == 2 && bpf_dump_raw_ok())) { + *(int *)table->data = jit_enable; + if (jit_enable == 2) + pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); + } else { + ret = -EPERM; + } + } + return ret; +} + +# ifdef CONFIG_HAVE_EBPF_JIT +static int +proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); +} +# endif +#endif + static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { @@ -325,13 +366,14 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, -#ifndef CONFIG_BPF_JIT_ALWAYS_ON - .proc_handler = proc_dointvec -#else - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_bpf_enable, +# ifdef CONFIG_BPF_JIT_ALWAYS_ON .extra1 = &one, .extra2 = &one, -#endif +# else + .extra1 = &zero, + .extra2 = &two, +# endif }, # ifdef CONFIG_HAVE_EBPF_JIT { @@ -339,14 +381,18 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_harden, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax_bpf_restricted, + .extra1 = &zero, + .extra2 = &two, }, { .procname = "bpf_jit_kallsyms", .data = &bpf_jit_kallsyms, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax_bpf_restricted, + .extra1 = &zero, + .extra2 = &one, }, # endif #endif diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 21f9bed11988..adf50fbc4c13 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -271,13 +271,12 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_CPU: case DSA_PORT_TYPE_DSA: - err = dsa_port_fixed_link_register_of(dp); + err = dsa_port_link_register_of(dp); if (err) { - dev_err(ds->dev, "failed to register fixed link for port %d.%d\n", + dev_err(ds->dev, "failed to setup link for port %d.%d\n", ds->index, dp->index); return err; } - break; case DSA_PORT_TYPE_USER: err = dsa_slave_create(dp); @@ -301,7 +300,7 @@ static void dsa_port_teardown(struct dsa_port *dp) break; case DSA_PORT_TYPE_CPU: case DSA_PORT_TYPE_DSA: - dsa_port_fixed_link_unregister_of(dp); + dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: if (dp->slave) { diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index cefb0c3c6d51..70de7895e5b8 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -166,8 +166,8 @@ int dsa_port_vlan_add(struct dsa_port *dp, struct switchdev_trans *trans); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); -int dsa_port_fixed_link_register_of(struct dsa_port *dp); -void dsa_port_fixed_link_unregister_of(struct dsa_port *dp); +int dsa_port_link_register_of(struct dsa_port *dp); +void dsa_port_link_unregister_of(struct dsa_port *dp); /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index aa56d3fb5da4..cb54b81d0bd9 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds) if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - ret = dsa_port_fixed_link_register_of(&ds->ports[port]); + ret = dsa_port_link_register_of(&ds->ports[port]); if (ret) return ret; } @@ -275,7 +275,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) for (port = 0; port < ds->num_ports; port++) { if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - dsa_port_fixed_link_unregister_of(&ds->ports[port]); + dsa_port_link_unregister_of(&ds->ports[port]); } if (ds->slave_mii_bus && ds->ops->phy_read) diff --git a/net/dsa/port.c b/net/dsa/port.c index bb4be2679904..7acc1169d75e 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -273,7 +273,56 @@ int dsa_port_vlan_del(struct dsa_port *dp, return 0; } -int dsa_port_fixed_link_register_of(struct dsa_port *dp) +static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable) +{ + struct device_node *port_dn = dp->dn; + struct device_node *phy_dn; + struct dsa_switch *ds = dp->ds; + struct phy_device *phydev; + int port = dp->index; + int err = 0; + + phy_dn = of_parse_phandle(port_dn, "phy-handle", 0); + if (!phy_dn) + return 0; + + phydev = of_phy_find_device(phy_dn); + if (!phydev) { + err = -EPROBE_DEFER; + goto err_put_of; + } + + if (enable) { + err = genphy_config_init(phydev); + if (err < 0) + goto err_put_dev; + + err = genphy_resume(phydev); + if (err < 0) + goto err_put_dev; + + err = genphy_read_status(phydev); + if (err < 0) + goto err_put_dev; + } else { + err = genphy_suspend(phydev); + if (err < 0) + goto err_put_dev; + } + + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); + + dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev)); + +err_put_dev: + put_device(&phydev->mdio.dev); +err_put_of: + of_node_put(phy_dn); + return err; +} + +static int dsa_port_fixed_link_register_of(struct dsa_port *dp) { struct device_node *dn = dp->dn; struct dsa_switch *ds = dp->ds; @@ -282,38 +331,44 @@ int dsa_port_fixed_link_register_of(struct dsa_port *dp) int mode; int err; - if (of_phy_is_fixed_link(dn)) { - err = of_phy_register_fixed_link(dn); - if (err) { - dev_err(ds->dev, - "failed to register the fixed PHY of port %d\n", - port); - return err; - } + err = of_phy_register_fixed_link(dn); + if (err) { + dev_err(ds->dev, + "failed to register the fixed PHY of port %d\n", + port); + return err; + } - phydev = of_phy_find_device(dn); + phydev = of_phy_find_device(dn); - mode = of_get_phy_mode(dn); - if (mode < 0) - mode = PHY_INTERFACE_MODE_NA; - phydev->interface = mode; + mode = of_get_phy_mode(dn); + if (mode < 0) + mode = PHY_INTERFACE_MODE_NA; + phydev->interface = mode; - genphy_config_init(phydev); - genphy_read_status(phydev); + genphy_config_init(phydev); + genphy_read_status(phydev); - if (ds->ops->adjust_link) - ds->ops->adjust_link(ds, port, phydev); + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); - put_device(&phydev->mdio.dev); - } + put_device(&phydev->mdio.dev); return 0; } -void dsa_port_fixed_link_unregister_of(struct dsa_port *dp) +int dsa_port_link_register_of(struct dsa_port *dp) { - struct device_node *dn = dp->dn; + if (of_phy_is_fixed_link(dp->dn)) + return dsa_port_fixed_link_register_of(dp); + else + return dsa_port_setup_phy_of(dp, true); +} - if (of_phy_is_fixed_link(dn)) - of_phy_deregister_fixed_link(dn); +void dsa_port_link_unregister_of(struct dsa_port *dp) +{ + if (of_phy_is_fixed_link(dp->dn)) + of_phy_deregister_fixed_link(dp->dn); + else + dsa_port_setup_phy_of(dp, false); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 54cccdd8b1e3..c24008daa3d8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -872,6 +872,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sock *sk = sock->sk; int err = 0; struct net *net = sock_net(sk); + void __user *p = (void __user *)arg; + struct ifreq ifr; + struct rtentry rt; switch (cmd) { case SIOCGSTAMP: @@ -882,8 +885,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; case SIOCADDRT: case SIOCDELRT: + if (copy_from_user(&rt, p, sizeof(struct rtentry))) + return -EFAULT; + err = ip_rt_ioctl(net, cmd, &rt); + break; case SIOCRTMSG: - err = ip_rt_ioctl(net, cmd, (void __user *)arg); + err = -EINVAL; break; case SIOCDARP: case SIOCGARP: @@ -891,17 +898,26 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: - case SIOCSIFADDR: case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: case SIOCGIFNETMASK: - case SIOCSIFNETMASK: case SIOCGIFDSTADDR: + case SIOCGIFPFLAGS: + if (copy_from_user(&ifr, p, sizeof(struct ifreq))) + return -EFAULT; + err = devinet_ioctl(net, cmd, &ifr); + if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq))) + err = -EFAULT; + break; + + case SIOCSIFADDR: + case SIOCSIFBRDADDR: + case SIOCSIFNETMASK: case SIOCSIFDSTADDR: case SIOCSIFPFLAGS: - case SIOCGIFPFLAGS: case SIOCSIFFLAGS: - err = devinet_ioctl(net, cmd, (void __user *)arg); + if (copy_from_user(&ifr, p, sizeof(struct ifreq))) + return -EFAULT; + err = devinet_ioctl(net, cmd, &ifr); break; default: if (sk->sk_prot->ioctl) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7a93359fbc72..e056c0067f2c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -946,11 +946,10 @@ static int inet_abc_len(__be32 addr) } -int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) { - struct ifreq ifr; struct sockaddr_in sin_orig; - struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; + struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr; struct in_device *in_dev; struct in_ifaddr **ifap = NULL; struct in_ifaddr *ifa = NULL; @@ -959,22 +958,16 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) int ret = -EFAULT; int tryaddrmatch = 0; - /* - * Fetch the caller's info block into kernel space - */ - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - goto out; - ifr.ifr_name[IFNAMSIZ - 1] = 0; + ifr->ifr_name[IFNAMSIZ - 1] = 0; /* save original address for comparison */ memcpy(&sin_orig, sin, sizeof(*sin)); - colon = strchr(ifr.ifr_name, ':'); + colon = strchr(ifr->ifr_name, ':'); if (colon) *colon = 0; - dev_load(net, ifr.ifr_name); + dev_load(net, ifr->ifr_name); switch (cmd) { case SIOCGIFADDR: /* Get interface address */ @@ -1014,7 +1007,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) rtnl_lock(); ret = -ENODEV; - dev = __dev_get_by_name(net, ifr.ifr_name); + dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) goto done; @@ -1031,7 +1024,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) This is checked above. */ for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { - if (!strcmp(ifr.ifr_name, ifa->ifa_label) && + if (!strcmp(ifr->ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == ifa->ifa_local) { break; /* found */ @@ -1044,7 +1037,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) { for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) - if (!strcmp(ifr.ifr_name, ifa->ifa_label)) + if (!strcmp(ifr->ifr_name, ifa->ifa_label)) break; } } @@ -1056,19 +1049,19 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) switch (cmd) { case SIOCGIFADDR: /* Get interface address */ sin->sin_addr.s_addr = ifa->ifa_local; - goto rarok; + break; case SIOCGIFBRDADDR: /* Get the broadcast address */ sin->sin_addr.s_addr = ifa->ifa_broadcast; - goto rarok; + break; case SIOCGIFDSTADDR: /* Get the destination address */ sin->sin_addr.s_addr = ifa->ifa_address; - goto rarok; + break; case SIOCGIFNETMASK: /* Get the netmask for the interface */ sin->sin_addr.s_addr = ifa->ifa_mask; - goto rarok; + break; case SIOCSIFFLAGS: if (colon) { @@ -1076,11 +1069,11 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) break; ret = 0; - if (!(ifr.ifr_flags & IFF_UP)) + if (!(ifr->ifr_flags & IFF_UP)) inet_del_ifa(in_dev, ifap, 1); break; } - ret = dev_change_flags(dev, ifr.ifr_flags); + ret = dev_change_flags(dev, ifr->ifr_flags); break; case SIOCSIFADDR: /* Set interface address (and family) */ @@ -1095,7 +1088,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; INIT_HLIST_NODE(&ifa->hash); if (colon) - memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); + memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); } else { @@ -1182,28 +1175,27 @@ done: rtnl_unlock(); out: return ret; -rarok: - rtnl_unlock(); - ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; - goto out; } -static int inet_gifconf(struct net_device *dev, char __user *buf, int len) +static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) { struct in_device *in_dev = __in_dev_get_rtnl(dev); struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; + if (WARN_ON(size > sizeof(struct ifreq))) + goto out; + if (!in_dev) goto out; for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { if (!buf) { - done += sizeof(ifr); + done += size; continue; } - if (len < (int) sizeof(ifr)) + if (len < size) break; memset(&ifr, 0, sizeof(struct ifreq)); strcpy(ifr.ifr_name, ifa->ifa_label); @@ -1212,13 +1204,12 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len) (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = ifa->ifa_local; - if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { + if (copy_to_user(buf + done, &ifr, size)) { done = -EFAULT; break; } - buf += sizeof(struct ifreq); - len -= sizeof(struct ifreq); - done += sizeof(struct ifreq); + len -= size; + done += size; } out: return done; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 32fbd9ba3609..da5635fc52c2 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -118,6 +118,9 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, if (!xo) return ERR_PTR(-EINVAL); + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) + return ERR_PTR(-EINVAL); + x = skb->sp->xvec[skb->sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 08259d078b1c..f05afaf3235c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -587,10 +587,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, * Handle IP routing ioctl calls. * These are used to manipulate the routing tables */ -int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt) { struct fib_config cfg; - struct rtentry rt; int err; switch (cmd) { @@ -599,11 +598,8 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&rt, arg, sizeof(rt))) - return -EFAULT; - rtnl_lock(); - err = rtentry_to_fib_config(net, cmd, &rt, &cfg); + err = rtentry_to_fib_config(net, cmd, rt, &cfg); if (err == 0) { struct fib_table *tb; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 02f00be12bb0..10f7f74a0831 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev, return htonl(INADDR_ANY); for_ifa(in_dev) { - if (inet_ifa_match(fl4->saddr, ifa)) + if (fl4->saddr == ifa->ifa_local) return fl4->saddr; } endfor_ifa(in_dev); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 5ddb1cb52bd4..141f5e865731 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -711,9 +711,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, - tunnel->fwmark); + if (tunnel->fwmark) { + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, + tunnel->fwmark); + } + else { + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, + skb->mark); + } if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index e9e488e72900..f75802ad960f 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -329,39 +329,6 @@ set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) sin->sin_port = port; } -static int __init ic_devinet_ioctl(unsigned int cmd, struct ifreq *arg) -{ - int res; - - mm_segment_t oldfs = get_fs(); - set_fs(get_ds()); - res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg); - set_fs(oldfs); - return res; -} - -static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg) -{ - int res; - - mm_segment_t oldfs = get_fs(); - set_fs(get_ds()); - res = dev_ioctl(&init_net, cmd, (struct ifreq __user *) arg); - set_fs(oldfs); - return res; -} - -static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg) -{ - int res; - - mm_segment_t oldfs = get_fs(); - set_fs(get_ds()); - res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg); - set_fs(oldfs); - return res; -} - /* * Set up interface addresses and routes. */ @@ -375,19 +342,19 @@ static int __init ic_setup_if(void) memset(&ir, 0, sizeof(ir)); strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name); set_sockaddr(sin, ic_myaddr, 0); - if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) { + if ((err = devinet_ioctl(&init_net, SIOCSIFADDR, &ir)) < 0) { pr_err("IP-Config: Unable to set interface address (%d)\n", err); return -1; } set_sockaddr(sin, ic_netmask, 0); - if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) { + if ((err = devinet_ioctl(&init_net, SIOCSIFNETMASK, &ir)) < 0) { pr_err("IP-Config: Unable to set interface netmask (%d)\n", err); return -1; } set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0); - if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) { + if ((err = devinet_ioctl(&init_net, SIOCSIFBRDADDR, &ir)) < 0) { pr_err("IP-Config: Unable to set interface broadcast address (%d)\n", err); return -1; @@ -397,11 +364,11 @@ static int __init ic_setup_if(void) * out, we'll try to muddle along. */ if (ic_dev_mtu != 0) { - strcpy(ir.ifr_name, ic_dev->dev->name); - ir.ifr_mtu = ic_dev_mtu; - if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0) + rtnl_lock(); + if ((err = dev_set_mtu(ic_dev->dev, ic_dev_mtu)) < 0) pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n", ic_dev_mtu, err); + rtnl_unlock(); } return 0; } @@ -423,7 +390,7 @@ static int __init ic_setup_routes(void) set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0); set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0); rm.rt_flags = RTF_UP | RTF_GATEWAY; - if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) { + if ((err = ip_rt_ioctl(&init_net, SIOCADDRT, &rm)) < 0) { pr_err("IP-Config: Cannot add default route (%d)\n", err); return -1; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 7d5d444964aa..5f52236780b4 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -79,8 +79,9 @@ config NF_TABLES_ARP endif # NF_TABLES config NF_FLOW_TABLE_IPV4 - select NF_FLOW_TABLE tristate "Netfilter flow table IPv4 module" + depends on NF_CONNTRACK && NF_TABLES + select NF_FLOW_TABLE help This option adds the flow table IPv4 support. @@ -157,6 +158,7 @@ config NF_NAT_SNMP_BASIC depends on NF_CONNTRACK_SNMP depends on NETFILTER_ADVANCED default NF_NAT && NF_CONNTRACK_SNMP + select ASN1 ---help--- This module implements an Application Layer Gateway (ALG) for @@ -342,6 +344,7 @@ config IP_NF_TARGET_CLUSTERIP depends on NF_CONNTRACK_IPV4 depends on NETFILTER_ADVANCED select NF_CONNTRACK_MARK + select NETFILTER_FAMILY_ARP help The CLUSTERIP target allows you to build load-balancing clusters of network servers without having a dedicated load-balancing diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 8bb1f0c7a375..2dad20eefd26 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -27,9 +27,15 @@ obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o + +nf_nat_snmp_basic-y := nf_nat_snmp_basic-asn1.o nf_nat_snmp_basic_main.o +nf_nat_snmp_basic-y : nf_nat_snmp_basic-asn1.h nf_nat_snmp_basic-asn1.c obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o +clean-files := nf_nat_snmp_basic-asn1.c nf_nat_snmp_basic-asn1.h + obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o + # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index bf8a5340f15e..5f7c0d643fb3 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1656,7 +1656,6 @@ static int __init arp_tables_init(void) if (ret < 0) goto err4; - pr_info("arp_tables: (C) 2002 David S. Miller\n"); return 0; err4: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 0b975aa2d363..1f534aec22f0 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1939,7 +1939,6 @@ static int __init ip_tables_init(void) if (ret < 0) goto err5; - pr_info("(C) 2000-2006 Netfilter Core Team\n"); return 0; err5: diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index a869d1fea7d9..960625aabf04 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -3,6 +3,7 @@ * * Copyright (C) 2003 Jozsef Kadlecsik <[email protected]> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/slab.h> @@ -12,6 +13,10 @@ static int __net_init iptable_raw_table_init(struct net *net); +static bool raw_before_defrag __read_mostly; +MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag"); +module_param(raw_before_defrag, bool, 0000); + static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -21,6 +26,15 @@ static const struct xt_table packet_raw = { .table_init = iptable_raw_table_init, }; +static const struct xt_table packet_raw_before_defrag = { + .name = "raw", + .valid_hooks = RAW_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG, + .table_init = iptable_raw_table_init, +}; + /* The work comes in here from netfilter.c. */ static unsigned int iptable_raw_hook(void *priv, struct sk_buff *skb, @@ -34,15 +48,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init iptable_raw_table_init(struct net *net) { struct ipt_replace *repl; + const struct xt_table *table = &packet_raw; int ret; + if (raw_before_defrag) + table = &packet_raw_before_defrag; + if (net->ipv4.iptable_raw) return 0; - repl = ipt_alloc_initial_table(&packet_raw); + repl = ipt_alloc_initial_table(table); if (repl == NULL) return -ENOMEM; - ret = ipt_register_table(net, &packet_raw, repl, rawtable_ops, + ret = ipt_register_table(net, table, repl, rawtable_ops, &net->ipv4.iptable_raw); kfree(repl); return ret; @@ -63,8 +81,15 @@ static struct pernet_operations iptable_raw_net_ops = { static int __init iptable_raw_init(void) { int ret; + const struct xt_table *table = &packet_raw; + + if (raw_before_defrag) { + table = &packet_raw_before_defrag; + + pr_info("Enabling raw table before defrag\n"); + } - rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook); + rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook); if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 37fe1616ca0b..a0d3ad60a411 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -78,6 +78,8 @@ static unsigned int ipv4_conntrack_defrag(void *priv, if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; #endif + if (skb->_nfct == IP_CT_UNTRACKED) + return NF_ACCEPT; #endif /* Gather fragments. */ if (ip_is_fragment(ip_hdr(skb))) { diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 new file mode 100644 index 000000000000..24b73268f362 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 @@ -0,0 +1,177 @@ +Message ::= + SEQUENCE { + version + INTEGER ({snmp_version}), + + community + OCTET STRING, + + pdu + PDUs + } + + +ObjectName ::= + OBJECT IDENTIFIER + +ObjectSyntax ::= + CHOICE { + simple + SimpleSyntax, + + application-wide + ApplicationSyntax + } + +SimpleSyntax ::= + CHOICE { + integer-value + INTEGER, + + string-value + OCTET STRING, + + objectID-value + OBJECT IDENTIFIER + } + +ApplicationSyntax ::= + CHOICE { + ipAddress-value + IpAddress, + + counter-value + Counter32, + + timeticks-value + TimeTicks, + + arbitrary-value + Opaque, + + big-counter-value + Counter64, + + unsigned-integer-value + Unsigned32 + } + +IpAddress ::= + [APPLICATION 0] + IMPLICIT OCTET STRING OPTIONAL ({snmp_helper}) + +Counter32 ::= + [APPLICATION 1] + IMPLICIT INTEGER OPTIONAL + +Unsigned32 ::= + [APPLICATION 2] + IMPLICIT INTEGER OPTIONAL + +Gauge32 ::= Unsigned32 OPTIONAL + +TimeTicks ::= + [APPLICATION 3] + IMPLICIT INTEGER OPTIONAL + +Opaque ::= + [APPLICATION 4] + IMPLICIT OCTET STRING OPTIONAL + +Counter64 ::= + [APPLICATION 6] + IMPLICIT INTEGER OPTIONAL + +PDUs ::= + CHOICE { + get-request + GetRequest-PDU, + + get-next-request + GetNextRequest-PDU, + + get-bulk-request + GetBulkRequest-PDU, + + response + Response-PDU, + + set-request + SetRequest-PDU, + + inform-request + InformRequest-PDU, + + snmpV2-trap + SNMPv2-Trap-PDU, + + report + Report-PDU + } + +GetRequest-PDU ::= + [0] IMPLICIT PDU OPTIONAL + +GetNextRequest-PDU ::= + [1] IMPLICIT PDU OPTIONAL + +Response-PDU ::= + [2] IMPLICIT PDU OPTIONAL + +SetRequest-PDU ::= + [3] IMPLICIT PDU OPTIONAL + +-- [4] is obsolete + +GetBulkRequest-PDU ::= + [5] IMPLICIT PDU OPTIONAL + +InformRequest-PDU ::= + [6] IMPLICIT PDU OPTIONAL + +SNMPv2-Trap-PDU ::= + [7] IMPLICIT PDU OPTIONAL + +Report-PDU ::= + [8] IMPLICIT PDU OPTIONAL + +PDU ::= + SEQUENCE { + request-id + INTEGER, + + error-status + INTEGER, + + error-index + INTEGER, + + variable-bindings + VarBindList + } + + +VarBind ::= + SEQUENCE { + name + ObjectName, + + CHOICE { + value + ObjectSyntax, + + unSpecified + NULL, + + noSuchObject + [0] IMPLICIT NULL, + + noSuchInstance + [1] IMPLICIT NULL, + + endOfMibView + [2] IMPLICIT NULL + } +} + +VarBindList ::= SEQUENCE OF VarBind diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c deleted file mode 100644 index d5b1e0b3f687..000000000000 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ /dev/null @@ -1,1286 +0,0 @@ -/* - * nf_nat_snmp_basic.c - * - * Basic SNMP Application Layer Gateway - * - * This IP NAT module is intended for use with SNMP network - * discovery and monitoring applications where target networks use - * conflicting private address realms. - * - * Static NAT is used to remap the networks from the view of the network - * management system at the IP layer, and this module remaps some application - * layer addresses to match. - * - * The simplest form of ALG is performed, where only tagged IP addresses - * are modified. The module does not need to be MIB aware and only scans - * messages at the ASN.1/BER level. - * - * Currently, only SNMPv1 and SNMPv2 are supported. - * - * More information on ALG and associated issues can be found in - * RFC 2962 - * - * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory - * McLean & Jochen Friedrich, stripped down for use in the kernel. - * - * Copyright (c) 2000 RP Internet (www.rpi.net.au). - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - * Author: James Morris <[email protected]> - * - * Copyright (c) 2006-2010 Patrick McHardy <[email protected]> - */ -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <net/checksum.h> -#include <net/udp.h> - -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_conntrack_expect.h> -#include <net/netfilter/nf_conntrack_helper.h> -#include <net/netfilter/nf_nat_helper.h> -#include <linux/netfilter/nf_conntrack_snmp.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("James Morris <[email protected]>"); -MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); -MODULE_ALIAS("ip_nat_snmp_basic"); - -#define SNMP_PORT 161 -#define SNMP_TRAP_PORT 162 -#define NOCT1(n) (*(u8 *)(n)) - -static int debug; -static DEFINE_SPINLOCK(snmp_lock); - -/* - * Application layer address mapping mimics the NAT mapping, but - * only for the first octet in this case (a more flexible system - * can be implemented if needed). - */ -struct oct1_map -{ - u_int8_t from; - u_int8_t to; -}; - - -/***************************************************************************** - * - * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse) - * - *****************************************************************************/ - -/* Class */ -#define ASN1_UNI 0 /* Universal */ -#define ASN1_APL 1 /* Application */ -#define ASN1_CTX 2 /* Context */ -#define ASN1_PRV 3 /* Private */ - -/* Tag */ -#define ASN1_EOC 0 /* End Of Contents */ -#define ASN1_BOL 1 /* Boolean */ -#define ASN1_INT 2 /* Integer */ -#define ASN1_BTS 3 /* Bit String */ -#define ASN1_OTS 4 /* Octet String */ -#define ASN1_NUL 5 /* Null */ -#define ASN1_OJI 6 /* Object Identifier */ -#define ASN1_OJD 7 /* Object Description */ -#define ASN1_EXT 8 /* External */ -#define ASN1_SEQ 16 /* Sequence */ -#define ASN1_SET 17 /* Set */ -#define ASN1_NUMSTR 18 /* Numerical String */ -#define ASN1_PRNSTR 19 /* Printable String */ -#define ASN1_TEXSTR 20 /* Teletext String */ -#define ASN1_VIDSTR 21 /* Video String */ -#define ASN1_IA5STR 22 /* IA5 String */ -#define ASN1_UNITIM 23 /* Universal Time */ -#define ASN1_GENTIM 24 /* General Time */ -#define ASN1_GRASTR 25 /* Graphical String */ -#define ASN1_VISSTR 26 /* Visible String */ -#define ASN1_GENSTR 27 /* General String */ - -/* Primitive / Constructed methods*/ -#define ASN1_PRI 0 /* Primitive */ -#define ASN1_CON 1 /* Constructed */ - -/* - * Error codes. - */ -#define ASN1_ERR_NOERROR 0 -#define ASN1_ERR_DEC_EMPTY 2 -#define ASN1_ERR_DEC_EOC_MISMATCH 3 -#define ASN1_ERR_DEC_LENGTH_MISMATCH 4 -#define ASN1_ERR_DEC_BADVALUE 5 - -/* - * ASN.1 context. - */ -struct asn1_ctx -{ - int error; /* Error condition */ - unsigned char *pointer; /* Octet just to be decoded */ - unsigned char *begin; /* First octet */ - unsigned char *end; /* Octet after last octet */ -}; - -/* - * Octet string (not null terminated) - */ -struct asn1_octstr -{ - unsigned char *data; - unsigned int len; -}; - -static void asn1_open(struct asn1_ctx *ctx, - unsigned char *buf, - unsigned int len) -{ - ctx->begin = buf; - ctx->end = buf + len; - ctx->pointer = buf; - ctx->error = ASN1_ERR_NOERROR; -} - -static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch) -{ - if (ctx->pointer >= ctx->end) { - ctx->error = ASN1_ERR_DEC_EMPTY; - return 0; - } - *ch = *(ctx->pointer)++; - return 1; -} - -static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) -{ - unsigned char ch; - - *tag = 0; - - do - { - if (!asn1_octet_decode(ctx, &ch)) - return 0; - *tag <<= 7; - *tag |= ch & 0x7F; - } while ((ch & 0x80) == 0x80); - return 1; -} - -static unsigned char asn1_id_decode(struct asn1_ctx *ctx, - unsigned int *cls, - unsigned int *con, - unsigned int *tag) -{ - unsigned char ch; - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *cls = (ch & 0xC0) >> 6; - *con = (ch & 0x20) >> 5; - *tag = (ch & 0x1F); - - if (*tag == 0x1F) { - if (!asn1_tag_decode(ctx, tag)) - return 0; - } - return 1; -} - -static unsigned char asn1_length_decode(struct asn1_ctx *ctx, - unsigned int *def, - unsigned int *len) -{ - unsigned char ch, cnt; - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - if (ch == 0x80) - *def = 0; - else { - *def = 1; - - if (ch < 0x80) - *len = ch; - else { - cnt = ch & 0x7F; - *len = 0; - - while (cnt > 0) { - if (!asn1_octet_decode(ctx, &ch)) - return 0; - *len <<= 8; - *len |= ch; - cnt--; - } - } - } - - /* don't trust len bigger than ctx buffer */ - if (*len > ctx->end - ctx->pointer) - return 0; - - return 1; -} - -static unsigned char asn1_header_decode(struct asn1_ctx *ctx, - unsigned char **eoc, - unsigned int *cls, - unsigned int *con, - unsigned int *tag) -{ - unsigned int def, len; - - if (!asn1_id_decode(ctx, cls, con, tag)) - return 0; - - def = len = 0; - if (!asn1_length_decode(ctx, &def, &len)) - return 0; - - /* primitive shall be definite, indefinite shall be constructed */ - if (*con == ASN1_PRI && !def) - return 0; - - if (def) - *eoc = ctx->pointer + len; - else - *eoc = NULL; - return 1; -} - -static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc) -{ - unsigned char ch; - - if (eoc == NULL) { - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - if (ch != 0x00) { - ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; - return 0; - } - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - if (ch != 0x00) { - ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; - return 0; - } - return 1; - } else { - if (ctx->pointer != eoc) { - ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH; - return 0; - } - return 1; - } -} - -static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc) -{ - ctx->pointer = eoc; - return 1; -} - -static unsigned char asn1_long_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - long *integer) -{ - unsigned char ch; - unsigned int len; - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer = (signed char) ch; - len = 1; - - while (ctx->pointer < eoc) { - if (++len > sizeof (long)) { - ctx->error = ASN1_ERR_DEC_BADVALUE; - return 0; - } - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer <<= 8; - *integer |= ch; - } - return 1; -} - -static unsigned char asn1_uint_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned int *integer) -{ - unsigned char ch; - unsigned int len; - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer = ch; - if (ch == 0) len = 0; - else len = 1; - - while (ctx->pointer < eoc) { - if (++len > sizeof (unsigned int)) { - ctx->error = ASN1_ERR_DEC_BADVALUE; - return 0; - } - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer <<= 8; - *integer |= ch; - } - return 1; -} - -static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned long *integer) -{ - unsigned char ch; - unsigned int len; - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer = ch; - if (ch == 0) len = 0; - else len = 1; - - while (ctx->pointer < eoc) { - if (++len > sizeof (unsigned long)) { - ctx->error = ASN1_ERR_DEC_BADVALUE; - return 0; - } - - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *integer <<= 8; - *integer |= ch; - } - return 1; -} - -static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned char **octets, - unsigned int *len) -{ - unsigned char *ptr; - - *len = 0; - - *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC); - if (*octets == NULL) - return 0; - - ptr = *octets; - while (ctx->pointer < eoc) { - if (!asn1_octet_decode(ctx, ptr++)) { - kfree(*octets); - *octets = NULL; - return 0; - } - (*len)++; - } - return 1; -} - -static unsigned char asn1_subid_decode(struct asn1_ctx *ctx, - unsigned long *subid) -{ - unsigned char ch; - - *subid = 0; - - do { - if (!asn1_octet_decode(ctx, &ch)) - return 0; - - *subid <<= 7; - *subid |= ch & 0x7F; - } while ((ch & 0x80) == 0x80); - return 1; -} - -static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned long **oid, - unsigned int *len) -{ - unsigned long subid; - unsigned long *optr; - size_t size; - - size = eoc - ctx->pointer + 1; - - /* first subid actually encodes first two subids */ - if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) - return 0; - - *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); - if (*oid == NULL) - return 0; - - optr = *oid; - - if (!asn1_subid_decode(ctx, &subid)) { - kfree(*oid); - *oid = NULL; - return 0; - } - - if (subid < 40) { - optr[0] = 0; - optr[1] = subid; - } else if (subid < 80) { - optr[0] = 1; - optr[1] = subid - 40; - } else { - optr[0] = 2; - optr[1] = subid - 80; - } - - *len = 2; - optr += 2; - - while (ctx->pointer < eoc) { - if (++(*len) > size) { - ctx->error = ASN1_ERR_DEC_BADVALUE; - kfree(*oid); - *oid = NULL; - return 0; - } - - if (!asn1_subid_decode(ctx, optr++)) { - kfree(*oid); - *oid = NULL; - return 0; - } - } - return 1; -} - -/***************************************************************************** - * - * SNMP decoding routines (gxsnmp author Dirk Wisse) - * - *****************************************************************************/ - -/* SNMP Versions */ -#define SNMP_V1 0 -#define SNMP_V2C 1 -#define SNMP_V2 2 -#define SNMP_V3 3 - -/* Default Sizes */ -#define SNMP_SIZE_COMM 256 -#define SNMP_SIZE_OBJECTID 128 -#define SNMP_SIZE_BUFCHR 256 -#define SNMP_SIZE_BUFINT 128 -#define SNMP_SIZE_SMALLOBJECTID 16 - -/* Requests */ -#define SNMP_PDU_GET 0 -#define SNMP_PDU_NEXT 1 -#define SNMP_PDU_RESPONSE 2 -#define SNMP_PDU_SET 3 -#define SNMP_PDU_TRAP1 4 -#define SNMP_PDU_BULK 5 -#define SNMP_PDU_INFORM 6 -#define SNMP_PDU_TRAP2 7 - -/* Errors */ -#define SNMP_NOERROR 0 -#define SNMP_TOOBIG 1 -#define SNMP_NOSUCHNAME 2 -#define SNMP_BADVALUE 3 -#define SNMP_READONLY 4 -#define SNMP_GENERROR 5 -#define SNMP_NOACCESS 6 -#define SNMP_WRONGTYPE 7 -#define SNMP_WRONGLENGTH 8 -#define SNMP_WRONGENCODING 9 -#define SNMP_WRONGVALUE 10 -#define SNMP_NOCREATION 11 -#define SNMP_INCONSISTENTVALUE 12 -#define SNMP_RESOURCEUNAVAILABLE 13 -#define SNMP_COMMITFAILED 14 -#define SNMP_UNDOFAILED 15 -#define SNMP_AUTHORIZATIONERROR 16 -#define SNMP_NOTWRITABLE 17 -#define SNMP_INCONSISTENTNAME 18 - -/* General SNMP V1 Traps */ -#define SNMP_TRAP_COLDSTART 0 -#define SNMP_TRAP_WARMSTART 1 -#define SNMP_TRAP_LINKDOWN 2 -#define SNMP_TRAP_LINKUP 3 -#define SNMP_TRAP_AUTFAILURE 4 -#define SNMP_TRAP_EQPNEIGHBORLOSS 5 -#define SNMP_TRAP_ENTSPECIFIC 6 - -/* SNMPv1 Types */ -#define SNMP_NULL 0 -#define SNMP_INTEGER 1 /* l */ -#define SNMP_OCTETSTR 2 /* c */ -#define SNMP_DISPLAYSTR 2 /* c */ -#define SNMP_OBJECTID 3 /* ul */ -#define SNMP_IPADDR 4 /* uc */ -#define SNMP_COUNTER 5 /* ul */ -#define SNMP_GAUGE 6 /* ul */ -#define SNMP_TIMETICKS 7 /* ul */ -#define SNMP_OPAQUE 8 /* c */ - -/* Additional SNMPv2 Types */ -#define SNMP_UINTEGER 5 /* ul */ -#define SNMP_BITSTR 9 /* uc */ -#define SNMP_NSAP 10 /* uc */ -#define SNMP_COUNTER64 11 /* ul */ -#define SNMP_NOSUCHOBJECT 12 -#define SNMP_NOSUCHINSTANCE 13 -#define SNMP_ENDOFMIBVIEW 14 - -union snmp_syntax -{ - unsigned char uc[0]; /* 8 bit unsigned */ - char c[0]; /* 8 bit signed */ - unsigned long ul[0]; /* 32 bit unsigned */ - long l[0]; /* 32 bit signed */ -}; - -struct snmp_object -{ - unsigned long *id; - unsigned int id_len; - unsigned short type; - unsigned int syntax_len; - union snmp_syntax syntax; -}; - -struct snmp_request -{ - unsigned long id; - unsigned int error_status; - unsigned int error_index; -}; - -struct snmp_v1_trap -{ - unsigned long *id; - unsigned int id_len; - unsigned long ip_address; /* pointer */ - unsigned int general; - unsigned int specific; - unsigned long time; -}; - -/* SNMP types */ -#define SNMP_IPA 0 -#define SNMP_CNT 1 -#define SNMP_GGE 2 -#define SNMP_TIT 3 -#define SNMP_OPQ 4 -#define SNMP_C64 6 - -/* SNMP errors */ -#define SERR_NSO 0 -#define SERR_NSI 1 -#define SERR_EOM 2 - -static inline void mangle_address(unsigned char *begin, - unsigned char *addr, - const struct oct1_map *map, - __sum16 *check); -struct snmp_cnv -{ - unsigned int class; - unsigned int tag; - int syntax; -}; - -static const struct snmp_cnv snmp_conv[] = { - {ASN1_UNI, ASN1_NUL, SNMP_NULL}, - {ASN1_UNI, ASN1_INT, SNMP_INTEGER}, - {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR}, - {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR}, - {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID}, - {ASN1_APL, SNMP_IPA, SNMP_IPADDR}, - {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */ - {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */ - {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS}, - {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE}, - - /* SNMPv2 data types and errors */ - {ASN1_UNI, ASN1_BTS, SNMP_BITSTR}, - {ASN1_APL, SNMP_C64, SNMP_COUNTER64}, - {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT}, - {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE}, - {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW}, - {0, 0, -1} -}; - -static unsigned char snmp_tag_cls2syntax(unsigned int tag, - unsigned int cls, - unsigned short *syntax) -{ - const struct snmp_cnv *cnv; - - cnv = snmp_conv; - - while (cnv->syntax != -1) { - if (cnv->tag == tag && cnv->class == cls) { - *syntax = cnv->syntax; - return 1; - } - cnv++; - } - return 0; -} - -static unsigned char snmp_object_decode(struct asn1_ctx *ctx, - struct snmp_object **obj) -{ - unsigned int cls, con, tag, len, idlen; - unsigned short type; - unsigned char *eoc, *end, *p; - unsigned long *lp, *id; - unsigned long ul; - long l; - - *obj = NULL; - id = NULL; - - if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) - return 0; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) - return 0; - - if (!asn1_oid_decode(ctx, end, &id, &idlen)) - return 0; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) { - kfree(id); - return 0; - } - - if (con != ASN1_PRI) { - kfree(id); - return 0; - } - - type = 0; - if (!snmp_tag_cls2syntax(tag, cls, &type)) { - kfree(id); - return 0; - } - - l = 0; - switch (type) { - case SNMP_INTEGER: - len = sizeof(long); - if (!asn1_long_decode(ctx, end, &l)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - return 0; - } - (*obj)->syntax.l[0] = l; - break; - case SNMP_OCTETSTR: - case SNMP_OPAQUE: - if (!asn1_octets_decode(ctx, end, &p, &len)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(p); - kfree(id); - return 0; - } - memcpy((*obj)->syntax.c, p, len); - kfree(p); - break; - case SNMP_NULL: - case SNMP_NOSUCHOBJECT: - case SNMP_NOSUCHINSTANCE: - case SNMP_ENDOFMIBVIEW: - len = 0; - *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - return 0; - } - if (!asn1_null_decode(ctx, end)) { - kfree(id); - kfree(*obj); - *obj = NULL; - return 0; - } - break; - case SNMP_OBJECTID: - if (!asn1_oid_decode(ctx, end, &lp, &len)) { - kfree(id); - return 0; - } - len *= sizeof(unsigned long); - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(lp); - kfree(id); - return 0; - } - memcpy((*obj)->syntax.ul, lp, len); - kfree(lp); - break; - case SNMP_IPADDR: - if (!asn1_octets_decode(ctx, end, &p, &len)) { - kfree(id); - return 0; - } - if (len != 4) { - kfree(p); - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(p); - kfree(id); - return 0; - } - memcpy((*obj)->syntax.uc, p, len); - kfree(p); - break; - case SNMP_COUNTER: - case SNMP_GAUGE: - case SNMP_TIMETICKS: - len = sizeof(unsigned long); - if (!asn1_ulong_decode(ctx, end, &ul)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - return 0; - } - (*obj)->syntax.ul[0] = ul; - break; - default: - kfree(id); - return 0; - } - - (*obj)->syntax_len = len; - (*obj)->type = type; - (*obj)->id = id; - (*obj)->id_len = idlen; - - if (!asn1_eoc_decode(ctx, eoc)) { - kfree(id); - kfree(*obj); - *obj = NULL; - return 0; - } - return 1; -} - -static unsigned char noinline_for_stack -snmp_request_decode(struct asn1_ctx *ctx, struct snmp_request *request) -{ - unsigned int cls, con, tag; - unsigned char *end; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - return 0; - - if (!asn1_ulong_decode(ctx, end, &request->id)) - return 0; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - return 0; - - if (!asn1_uint_decode(ctx, end, &request->error_status)) - return 0; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - return 0; - - if (!asn1_uint_decode(ctx, end, &request->error_index)) - return 0; - - return 1; -} - -/* - * Fast checksum update for possibly oddly-aligned UDP byte, from the - * code example in the draft. - */ -static void fast_csum(__sum16 *csum, - const unsigned char *optr, - const unsigned char *nptr, - int offset) -{ - unsigned char s[4]; - - if (offset & 1) { - s[0] = ~0; - s[1] = ~*optr; - s[2] = 0; - s[3] = *nptr; - } else { - s[0] = ~*optr; - s[1] = ~0; - s[2] = *nptr; - s[3] = 0; - } - - *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); -} - -/* - * Mangle IP address. - * - begin points to the start of the snmp messgae - * - addr points to the start of the address - */ -static inline void mangle_address(unsigned char *begin, - unsigned char *addr, - const struct oct1_map *map, - __sum16 *check) -{ - if (map->from == NOCT1(addr)) { - u_int32_t old; - - if (debug) - memcpy(&old, addr, sizeof(old)); - - *addr = map->to; - - /* Update UDP checksum if being used */ - if (*check) { - fast_csum(check, - &map->from, &map->to, addr - begin); - - } - - if (debug) - printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n", - &old, addr); - } -} - -static unsigned char noinline_for_stack -snmp_trap_decode(struct asn1_ctx *ctx, struct snmp_v1_trap *trap, - const struct oct1_map *map, - __sum16 *check) -{ - unsigned int cls, con, tag, len; - unsigned char *end; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) - return 0; - - if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len)) - return 0; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - goto err_id_free; - - if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) || - (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS))) - goto err_id_free; - - if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len)) - goto err_id_free; - - /* IPv4 only */ - if (len != 4) - goto err_addr_free; - - mangle_address(ctx->begin, ctx->pointer - 4, map, check); - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - goto err_addr_free; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - goto err_addr_free; - - if (!asn1_uint_decode(ctx, end, &trap->general)) - goto err_addr_free; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - goto err_addr_free; - - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - goto err_addr_free; - - if (!asn1_uint_decode(ctx, end, &trap->specific)) - goto err_addr_free; - - if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) - goto err_addr_free; - - if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) || - (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT))) - goto err_addr_free; - - if (!asn1_ulong_decode(ctx, end, &trap->time)) - goto err_addr_free; - - return 1; - -err_addr_free: - kfree((unsigned long *)trap->ip_address); - -err_id_free: - kfree(trap->id); - - return 0; -} - -/***************************************************************************** - * - * Misc. routines - * - *****************************************************************************/ - -/* - * Parse and mangle SNMP message according to mapping. - * (And this is the fucking 'basic' method). - */ -static int snmp_parse_mangle(unsigned char *msg, - u_int16_t len, - const struct oct1_map *map, - __sum16 *check) -{ - unsigned char *eoc, *end; - unsigned int cls, con, tag, vers, pdutype; - struct asn1_ctx ctx; - struct asn1_octstr comm; - struct snmp_object *obj; - - if (debug > 1) - print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1, - msg, len, 0); - - asn1_open(&ctx, msg, len); - - /* - * Start of SNMP message. - */ - if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) - return 0; - if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) - return 0; - - /* - * Version 1 or 2 handled. - */ - if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag)) - return 0; - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) - return 0; - if (!asn1_uint_decode (&ctx, end, &vers)) - return 0; - if (debug > 1) - pr_debug("bsalg: snmp version: %u\n", vers + 1); - if (vers > 1) - return 1; - - /* - * Community. - */ - if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag)) - return 0; - if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS) - return 0; - if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len)) - return 0; - if (debug > 1) { - unsigned int i; - - pr_debug("bsalg: community: "); - for (i = 0; i < comm.len; i++) - pr_cont("%c", comm.data[i]); - pr_cont("\n"); - } - kfree(comm.data); - - /* - * PDU type - */ - if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype)) - return 0; - if (cls != ASN1_CTX || con != ASN1_CON) - return 0; - if (debug > 1) { - static const unsigned char *const pdus[] = { - [SNMP_PDU_GET] = "get", - [SNMP_PDU_NEXT] = "get-next", - [SNMP_PDU_RESPONSE] = "response", - [SNMP_PDU_SET] = "set", - [SNMP_PDU_TRAP1] = "trapv1", - [SNMP_PDU_BULK] = "bulk", - [SNMP_PDU_INFORM] = "inform", - [SNMP_PDU_TRAP2] = "trapv2" - }; - - if (pdutype > SNMP_PDU_TRAP2) - pr_debug("bsalg: bad pdu type %u\n", pdutype); - else - pr_debug("bsalg: pdu: %s\n", pdus[pdutype]); - } - if (pdutype != SNMP_PDU_RESPONSE && - pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2) - return 1; - - /* - * Request header or v1 trap - */ - if (pdutype == SNMP_PDU_TRAP1) { - struct snmp_v1_trap trap; - unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check); - - if (ret) { - kfree(trap.id); - kfree((unsigned long *)trap.ip_address); - } else - return ret; - - } else { - struct snmp_request req; - - if (!snmp_request_decode(&ctx, &req)) - return 0; - - if (debug > 1) - pr_debug("bsalg: request: id=0x%lx error_status=%u " - "error_index=%u\n", req.id, req.error_status, - req.error_index); - } - - /* - * Loop through objects, look for IP addresses to mangle. - */ - if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) - return 0; - - if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) - return 0; - - while (!asn1_eoc_decode(&ctx, eoc)) { - unsigned int i; - - if (!snmp_object_decode(&ctx, &obj)) { - if (obj) { - kfree(obj->id); - kfree(obj); - } - return 0; - } - - if (debug > 1) { - pr_debug("bsalg: object: "); - for (i = 0; i < obj->id_len; i++) { - if (i > 0) - pr_cont("."); - pr_cont("%lu", obj->id[i]); - } - pr_cont(": type=%u\n", obj->type); - - } - - if (obj->type == SNMP_IPADDR) - mangle_address(ctx.begin, ctx.pointer - 4, map, check); - - kfree(obj->id); - kfree(obj); - } - - if (!asn1_eoc_decode(&ctx, eoc)) - return 0; - - return 1; -} - -/***************************************************************************** - * - * NAT routines. - * - *****************************************************************************/ - -/* - * SNMP translation routine. - */ -static int snmp_translate(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - struct sk_buff *skb) -{ - struct iphdr *iph = ip_hdr(skb); - struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); - u_int16_t udplen = ntohs(udph->len); - u_int16_t paylen = udplen - sizeof(struct udphdr); - int dir = CTINFO2DIR(ctinfo); - struct oct1_map map; - - /* - * Determine mappping for application layer addresses based - * on NAT manipulations for the packet. - */ - if (dir == IP_CT_DIR_ORIGINAL) { - /* SNAT traps */ - map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip); - map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); - } else { - /* DNAT replies */ - map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip); - map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip); - } - - if (map.from == map.to) - return NF_ACCEPT; - - if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), - paylen, &map, &udph->check)) { - net_warn_ratelimited("bsalg: parser failed\n"); - return NF_DROP; - } - return NF_ACCEPT; -} - -/* We don't actually set up expectations, just adjust internal IP - * addresses if this is being NATted */ -static int help(struct sk_buff *skb, unsigned int protoff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - int dir = CTINFO2DIR(ctinfo); - unsigned int ret; - const struct iphdr *iph = ip_hdr(skb); - const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); - - /* SNMP replies and originating SNMP traps get mangled */ - if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) - return NF_ACCEPT; - if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) - return NF_ACCEPT; - - /* No NAT? */ - if (!(ct->status & IPS_NAT_MASK)) - return NF_ACCEPT; - - /* - * Make sure the packet length is ok. So far, we were only guaranteed - * to have a valid length IP header plus 8 bytes, which means we have - * enough room for a UDP header. Just verify the UDP length field so we - * can mess around with the payload. - */ - if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { - net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", - &iph->saddr, &iph->daddr); - return NF_DROP; - } - - if (!skb_make_writable(skb, skb->len)) - return NF_DROP; - - spin_lock_bh(&snmp_lock); - ret = snmp_translate(ct, ctinfo, skb); - spin_unlock_bh(&snmp_lock); - return ret; -} - -static const struct nf_conntrack_expect_policy snmp_exp_policy = { - .max_expected = 0, - .timeout = 180, -}; - -static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { - .me = THIS_MODULE, - .help = help, - .expect_policy = &snmp_exp_policy, - .name = "snmp_trap", - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, -}; - -/***************************************************************************** - * - * Module stuff. - * - *****************************************************************************/ - -static int __init nf_nat_snmp_basic_init(void) -{ - BUG_ON(nf_nat_snmp_hook != NULL); - RCU_INIT_POINTER(nf_nat_snmp_hook, help); - - return nf_conntrack_helper_register(&snmp_trap_helper); -} - -static void __exit nf_nat_snmp_basic_fini(void) -{ - RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); - synchronize_rcu(); - nf_conntrack_helper_unregister(&snmp_trap_helper); -} - -module_init(nf_nat_snmp_basic_init); -module_exit(nf_nat_snmp_basic_fini); - -module_param(debug, int, 0600); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c new file mode 100644 index 000000000000..b6e277093e7e --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c @@ -0,0 +1,235 @@ +/* + * nf_nat_snmp_basic.c + * + * Basic SNMP Application Layer Gateway + * + * This IP NAT module is intended for use with SNMP network + * discovery and monitoring applications where target networks use + * conflicting private address realms. + * + * Static NAT is used to remap the networks from the view of the network + * management system at the IP layer, and this module remaps some application + * layer addresses to match. + * + * The simplest form of ALG is performed, where only tagged IP addresses + * are modified. The module does not need to be MIB aware and only scans + * messages at the ASN.1/BER level. + * + * Currently, only SNMPv1 and SNMPv2 are supported. + * + * More information on ALG and associated issues can be found in + * RFC 2962 + * + * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory + * McLean & Jochen Friedrich, stripped down for use in the kernel. + * + * Copyright (c) 2000 RP Internet (www.rpi.net.au). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Author: James Morris <[email protected]> + * + * Copyright (c) 2006-2010 Patrick McHardy <[email protected]> + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <net/checksum.h> +#include <net/udp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <linux/netfilter/nf_conntrack_snmp.h> +#include "nf_nat_snmp_basic-asn1.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <[email protected]>"); +MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); +MODULE_ALIAS("ip_nat_snmp_basic"); + +#define SNMP_PORT 161 +#define SNMP_TRAP_PORT 162 + +static DEFINE_SPINLOCK(snmp_lock); + +struct snmp_ctx { + unsigned char *begin; + __sum16 *check; + __be32 from; + __be32 to; +}; + +static void fast_csum(struct snmp_ctx *ctx, unsigned char offset) +{ + unsigned char s[12] = {0,}; + int size; + + if (offset & 1) { + memcpy(&s[1], &ctx->from, 4); + memcpy(&s[7], &ctx->to, 4); + s[0] = ~0; + s[1] = ~s[1]; + s[2] = ~s[2]; + s[3] = ~s[3]; + s[4] = ~s[4]; + s[5] = ~0; + size = 12; + } else { + memcpy(&s[0], &ctx->from, 4); + memcpy(&s[4], &ctx->to, 4); + s[0] = ~s[0]; + s[1] = ~s[1]; + s[2] = ~s[2]; + s[3] = ~s[3]; + size = 8; + } + *ctx->check = csum_fold(csum_partial(s, size, + ~csum_unfold(*ctx->check))); +} + +int snmp_version(void *context, size_t hdrlen, unsigned char tag, + const void *data, size_t datalen) +{ + if (*(unsigned char *)data > 1) + return -ENOTSUPP; + return 1; +} + +int snmp_helper(void *context, size_t hdrlen, unsigned char tag, + const void *data, size_t datalen) +{ + struct snmp_ctx *ctx = (struct snmp_ctx *)context; + __be32 *pdata = (__be32 *)data; + + if (*pdata == ctx->from) { + pr_debug("%s: %pI4 to %pI4\n", __func__, + (void *)&ctx->from, (void *)&ctx->to); + + if (*ctx->check) + fast_csum(ctx, (unsigned char *)data - ctx->begin); + *pdata = ctx->to; + } + + return 1; +} + +static int snmp_translate(struct nf_conn *ct, int dir, struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); + u16 datalen = ntohs(udph->len) - sizeof(struct udphdr); + char *data = (unsigned char *)udph + sizeof(struct udphdr); + struct snmp_ctx ctx; + int ret; + + if (dir == IP_CT_DIR_ORIGINAL) { + ctx.from = ct->tuplehash[dir].tuple.src.u3.ip; + ctx.to = ct->tuplehash[!dir].tuple.dst.u3.ip; + } else { + ctx.from = ct->tuplehash[!dir].tuple.src.u3.ip; + ctx.to = ct->tuplehash[dir].tuple.dst.u3.ip; + } + + if (ctx.from == ctx.to) + return NF_ACCEPT; + + ctx.begin = (unsigned char *)udph + sizeof(struct udphdr); + ctx.check = &udph->check; + ret = asn1_ber_decoder(&nf_nat_snmp_basic_decoder, &ctx, data, datalen); + if (ret < 0) { + nf_ct_helper_log(skb, ct, "parser failed\n"); + return NF_DROP; + } + + return NF_ACCEPT; +} + +/* We don't actually set up expectations, just adjust internal IP + * addresses if this is being NATted + */ +static int help(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + int dir = CTINFO2DIR(ctinfo); + unsigned int ret; + const struct iphdr *iph = ip_hdr(skb); + const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); + + /* SNMP replies and originating SNMP traps get mangled */ + if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) + return NF_ACCEPT; + if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + /* No NAT? */ + if (!(ct->status & IPS_NAT_MASK)) + return NF_ACCEPT; + + /* Make sure the packet length is ok. So far, we were only guaranteed + * to have a valid length IP header plus 8 bytes, which means we have + * enough room for a UDP header. Just verify the UDP length field so we + * can mess around with the payload. + */ + if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { + nf_ct_helper_log(skb, ct, "dropping malformed packet\n"); + return NF_DROP; + } + + if (!skb_make_writable(skb, skb->len)) { + nf_ct_helper_log(skb, ct, "cannot mangle packet"); + return NF_DROP; + } + + spin_lock_bh(&snmp_lock); + ret = snmp_translate(ct, dir, skb); + spin_unlock_bh(&snmp_lock); + return ret; +} + +static const struct nf_conntrack_expect_policy snmp_exp_policy = { + .max_expected = 0, + .timeout = 180, +}; + +static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { + .me = THIS_MODULE, + .help = help, + .expect_policy = &snmp_exp_policy, + .name = "snmp_trap", + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT), + .tuple.dst.protonum = IPPROTO_UDP, +}; + +static int __init nf_nat_snmp_basic_init(void) +{ + BUG_ON(nf_nat_snmp_hook != NULL); + RCU_INIT_POINTER(nf_nat_snmp_hook, help); + + return nf_conntrack_helper_register(&snmp_trap_helper); +} + +static void __exit nf_nat_snmp_basic_fini(void) +{ + RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + synchronize_rcu(); + nf_conntrack_helper_unregister(&snmp_trap_helper); +} + +module_init(nf_nat_snmp_basic_init); +module_exit(nf_nat_snmp_basic_fini); diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index f84c17763f6f..036c074736b0 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -27,40 +27,6 @@ nft_do_chain_arp(void *priv, return nft_do_chain(&pkt, priv); } -static struct nft_af_info nft_af_arp __read_mostly = { - .family = NFPROTO_ARP, - .nhooks = NF_ARP_NUMHOOKS, - .owner = THIS_MODULE, -}; - -static int nf_tables_arp_init_net(struct net *net) -{ - net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.arp== NULL) - return -ENOMEM; - - memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp)); - - if (nft_register_afinfo(net, net->nft.arp) < 0) - goto err; - - return 0; -err: - kfree(net->nft.arp); - return -ENOMEM; -} - -static void nf_tables_arp_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.arp); - kfree(net->nft.arp); -} - -static struct pernet_operations nf_tables_arp_net_ops = { - .init = nf_tables_arp_init_net, - .exit = nf_tables_arp_exit_net, -}; - static const struct nf_chain_type filter_arp = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -76,22 +42,11 @@ static const struct nf_chain_type filter_arp = { static int __init nf_tables_arp_init(void) { - int ret; - - ret = nft_register_chain_type(&filter_arp); - if (ret < 0) - return ret; - - ret = register_pernet_subsys(&nf_tables_arp_net_ops); - if (ret < 0) - nft_unregister_chain_type(&filter_arp); - - return ret; + return nft_register_chain_type(&filter_arp); } static void __exit nf_tables_arp_exit(void) { - unregister_pernet_subsys(&nf_tables_arp_net_ops); nft_unregister_chain_type(&filter_arp); } @@ -100,4 +55,4 @@ module_exit(nf_tables_arp_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <[email protected]>"); -MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */ +MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */ diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index f4675253f1e6..96f955496d5f 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -30,40 +30,6 @@ static unsigned int nft_do_chain_ipv4(void *priv, return nft_do_chain(&pkt, priv); } -static struct nft_af_info nft_af_ipv4 __read_mostly = { - .family = NFPROTO_IPV4, - .nhooks = NF_INET_NUMHOOKS, - .owner = THIS_MODULE, -}; - -static int nf_tables_ipv4_init_net(struct net *net) -{ - net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.ipv4 == NULL) - return -ENOMEM; - - memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4)); - - if (nft_register_afinfo(net, net->nft.ipv4) < 0) - goto err; - - return 0; -err: - kfree(net->nft.ipv4); - return -ENOMEM; -} - -static void nf_tables_ipv4_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.ipv4); - kfree(net->nft.ipv4); -} - -static struct pernet_operations nf_tables_ipv4_net_ops = { - .init = nf_tables_ipv4_init_net, - .exit = nf_tables_ipv4_exit_net, -}; - static const struct nf_chain_type filter_ipv4 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -85,22 +51,11 @@ static const struct nf_chain_type filter_ipv4 = { static int __init nf_tables_ipv4_init(void) { - int ret; - - ret = nft_register_chain_type(&filter_ipv4); - if (ret < 0) - return ret; - - ret = register_pernet_subsys(&nf_tables_ipv4_net_ops); - if (ret < 0) - nft_unregister_chain_type(&filter_ipv4); - - return ret; + return nft_register_chain_type(&filter_ipv4); } static void __exit nf_tables_ipv4_exit(void) { - unregister_pernet_subsys(&nf_tables_ipv4_net_ops); nft_unregister_chain_type(&filter_ipv4); } @@ -109,4 +64,4 @@ module_exit(nf_tables_ipv4_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <[email protected]>"); -MODULE_ALIAS_NFT_FAMILY(AF_INET); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter"); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 8322f26e770e..785712be5b0d 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -766,7 +766,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && - (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) { + (rs->rtt_us <= bbr->min_rtt_us || + (filter_expired && !rs->is_ack_delayed))) { bbr->min_rtt_us = rs->rtt_us; bbr->min_rtt_stamp = tcp_jiffies32; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ff71b18d9682..cfa51cfd2d99 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -97,6 +97,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ +#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -2857,11 +2858,18 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, *rexmit = REXMIT_LOST; } -static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) +static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) { u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; struct tcp_sock *tp = tcp_sk(sk); + if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { + /* If the remote keeps returning delayed ACKs, eventually + * the min filter would pick it up and overestimate the + * prop. delay when it expires. Skip suspected delayed ACKs. + */ + return; + } minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, rtt_us ? : jiffies_to_usecs(1)); } @@ -2901,7 +2909,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, * always taken together with ACK, SACK, or TS-opts. Any negative * values will be skipped with the seq_rtt_us < 0 check above. */ - tcp_update_rtt_min(sk, ca_rtt_us); + tcp_update_rtt_min(sk, ca_rtt_us, flag); tcp_rtt_estimator(sk, seq_rtt_us); tcp_set_rto(sk); @@ -3125,6 +3133,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) { seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt); ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt); + + if (pkts_acked == 1 && last_in_flight < tp->mss_cache && + last_in_flight && !prior_sacked && fully_acked && + sack->rate->prior_delivered + 1 == tp->delivered && + !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) { + /* Conservatively mark a delayed ACK. It's typically + * from a lone runt packet over the round trip to + * a receiver w/o out-of-order or CE events. + */ + flag |= FLAG_ACK_MAYBE_DELAYED; + } } if (sack->first_sackt) { sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt); @@ -3614,6 +3633,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ + rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index b6a2aa1dcf56..4d58e2ce0b5b 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 01801b77bd0d..ea6e6e7df0ee 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 8affc6d83d58..63faeee989a9 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -92,6 +92,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); + eth_hdr(skb)->h_proto = skb->protocol; err = 0; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 44d109c435bc..3fd1ec775dc2 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -145,6 +145,9 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, if (!xo) return ERR_PTR(-EINVAL); + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) + return ERR_PTR(-EINVAL); + x = skb->sp->xvec[skb->sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index b7c4befe67ec..92b8d8c75eed 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1223,8 +1223,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } if (!rcu_access_pointer(fn->leaf)) { - atomic_inc(&rt->rt6i_ref); - rcu_assign_pointer(fn->leaf, rt); + if (fn->fn_flags & RTN_TL_ROOT) { + /* put back null_entry for root node */ + rcu_assign_pointer(fn->leaf, + info->nl_net->ipv6.ip6_null_entry); + } else { + atomic_inc(&rt->rt6i_ref); + rcu_assign_pointer(fn->leaf, rt); + } } fn = sn; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index db99446e0276..a88480193d77 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -352,11 +352,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, nt->dev = dev; nt->net = dev_net(dev); - ip6gre_tnl_link_config(nt, 1); if (register_netdevice(dev) < 0) goto failed_free; + ip6gre_tnl_link_config(nt, 1); + /* Can use a lockless transmit, unless we generate output sequences */ if (!(nt->parms.o_flags & TUNNEL_SEQ)) dev->features |= NETIF_F_LLTX; @@ -1709,7 +1710,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { - struct ip6_tnl *tunnel; int ret; ret = ip6gre_tunnel_init_common(dev); @@ -1718,10 +1718,6 @@ static int ip6gre_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - tunnel = netdev_priv(dev); - - ip6gre_tnl_link_config(tunnel, 1); - return 0; } @@ -1872,12 +1868,16 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->dev = dev; nt->net = dev_net(dev); - ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); err = register_netdevice(dev); if (err) goto out; + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + if (tb[IFLA_MTU]) + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + dev_hold(dev); ip6gre_tunnel_link(ign, nt); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a4a94452132b..997c7f19ad62 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -174,7 +174,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } -static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) { if (!np->autoflowlabel_set) return ip6_default_np_autolabel(net); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 2d4680e0376f..e8ffb5b5d84e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1336,7 +1336,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_AUTOFLOWLABEL: - val = np->autoflowlabel; + val = ip6_autoflowlabel(sock_net(sk), np); break; case IPV6_RECVFRAGSIZE: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 40b223a930a3..6a5d0e39bb87 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1655,8 +1655,6 @@ static void mld_sendpack(struct sk_buff *skb) if (err) goto err_out; - payload_len = skb->len; - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, net->ipv6.igmp_sk, skb, NULL, skb->dev, dst_output); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 806e95375ec8..4a634b7a2c80 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -72,8 +72,9 @@ endif # NF_TABLES_IPV6 endif # NF_TABLES config NF_FLOW_TABLE_IPV6 - select NF_FLOW_TABLE tristate "Netfilter flow table IPv6 module" + depends on NF_CONNTRACK && NF_TABLES + select NF_FLOW_TABLE help This option adds the flow table IPv6 support. @@ -240,6 +241,15 @@ config IP6_NF_MATCH_RT To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_SRH + tristate '"srh" Segment Routing header match support' + depends on NETFILTER_ADVANCED + help + srh matching allows you to match packets based on the segment + routing header of the packet. + + To compile it as a module, choose M here. If unsure, say N. + # The targets config IP6_NF_TARGET_HL tristate '"HL" hoplimit target support' diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 95611c4b39b0..d984057b8395 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -57,6 +57,7 @@ obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o +obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6ebbef2dfb60..37fa76ee5130 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1952,7 +1952,6 @@ static int __init ip6_tables_init(void) if (ret < 0) goto err5; - pr_info("(C) 2000-2006 Netfilter Core Team\n"); return 0; err5: diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c new file mode 100644 index 000000000000..9642164107ce --- /dev/null +++ b/net/ipv6/netfilter/ip6t_srh.c @@ -0,0 +1,161 @@ +/* Kernel module to match Segment Routing Header (SRH) parameters. */ + +/* Author: + * Ahmed Abdelsalam <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ipv6.h> +#include <linux/types.h> +#include <net/ipv6.h> +#include <net/seg6.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv6/ip6t_srh.h> +#include <linux/netfilter_ipv6/ip6_tables.h> + +/* Test a struct->mt_invflags and a boolean for inequality */ +#define NF_SRH_INVF(ptr, flag, boolean) \ + ((boolean) ^ !!((ptr)->mt_invflags & (flag))) + +static bool srh_mt6(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ip6t_srh *srhinfo = par->matchinfo; + struct ipv6_sr_hdr *srh; + struct ipv6_sr_hdr _srh; + int hdrlen, srhoff = 0; + + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + return false; + srh = skb_header_pointer(skb, srhoff, sizeof(_srh), &_srh); + if (!srh) + return false; + + hdrlen = ipv6_optlen(srh); + if (skb->len - srhoff < hdrlen) + return false; + + if (srh->type != IPV6_SRCRT_TYPE_4) + return false; + + if (srh->segments_left > srh->first_segment) + return false; + + /* Next Header matching */ + if (srhinfo->mt_flags & IP6T_SRH_NEXTHDR) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NEXTHDR, + !(srh->nexthdr == srhinfo->next_hdr))) + return false; + + /* Header Extension Length matching */ + if (srhinfo->mt_flags & IP6T_SRH_LEN_EQ) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_EQ, + !(srh->hdrlen == srhinfo->hdr_len))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_LEN_GT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_GT, + !(srh->hdrlen > srhinfo->hdr_len))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_LEN_LT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_LT, + !(srh->hdrlen < srhinfo->hdr_len))) + return false; + + /* Segments Left matching */ + if (srhinfo->mt_flags & IP6T_SRH_SEGS_EQ) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_EQ, + !(srh->segments_left == srhinfo->segs_left))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_SEGS_GT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_GT, + !(srh->segments_left > srhinfo->segs_left))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_SEGS_LT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_LT, + !(srh->segments_left < srhinfo->segs_left))) + return false; + + /** + * Last Entry matching + * Last_Entry field was introduced in revision 6 of the SRH draft. + * It was called First_Segment in the previous revision + */ + if (srhinfo->mt_flags & IP6T_SRH_LAST_EQ) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_EQ, + !(srh->first_segment == srhinfo->last_entry))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_LAST_GT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_GT, + !(srh->first_segment > srhinfo->last_entry))) + return false; + + if (srhinfo->mt_flags & IP6T_SRH_LAST_LT) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_LT, + !(srh->first_segment < srhinfo->last_entry))) + return false; + + /** + * Tag matchig + * Tag field was introduced in revision 6 of the SRH draft. + */ + if (srhinfo->mt_flags & IP6T_SRH_TAG) + if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_TAG, + !(srh->tag == srhinfo->tag))) + return false; + return true; +} + +static int srh_mt6_check(const struct xt_mtchk_param *par) +{ + const struct ip6t_srh *srhinfo = par->matchinfo; + + if (srhinfo->mt_flags & ~IP6T_SRH_MASK) { + pr_err("unknown srh match flags %X\n", srhinfo->mt_flags); + return -EINVAL; + } + + if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) { + pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags); + return -EINVAL; + } + + return 0; +} + +static struct xt_match srh_mt6_reg __read_mostly = { + .name = "srh", + .family = NFPROTO_IPV6, + .match = srh_mt6, + .matchsize = sizeof(struct ip6t_srh), + .checkentry = srh_mt6_check, + .me = THIS_MODULE, +}; + +static int __init srh_mt6_init(void) +{ + return xt_register_match(&srh_mt6_reg); +} + +static void __exit srh_mt6_exit(void) +{ + xt_unregister_match(&srh_mt6_reg); +} + +module_init(srh_mt6_init); +module_exit(srh_mt6_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: IPv6 Segment Routing Header match"); +MODULE_AUTHOR("Ahmed Abdelsalam <[email protected]>"); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index d4bc56443dc1..710fa0806c37 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -3,6 +3,7 @@ * * Copyright (C) 2003 Jozsef Kadlecsik <[email protected]> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> @@ -11,6 +12,10 @@ static int __net_init ip6table_raw_table_init(struct net *net); +static bool raw_before_defrag __read_mostly; +MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag"); +module_param(raw_before_defrag, bool, 0000); + static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -20,6 +25,15 @@ static const struct xt_table packet_raw = { .table_init = ip6table_raw_table_init, }; +static const struct xt_table packet_raw_before_defrag = { + .name = "raw", + .valid_hooks = RAW_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV6, + .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG, + .table_init = ip6table_raw_table_init, +}; + /* The work comes in here from netfilter.c. */ static unsigned int ip6table_raw_hook(void *priv, struct sk_buff *skb, @@ -33,15 +47,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init ip6table_raw_table_init(struct net *net) { struct ip6t_replace *repl; + const struct xt_table *table = &packet_raw; int ret; + if (raw_before_defrag) + table = &packet_raw_before_defrag; + if (net->ipv6.ip6table_raw) return 0; - repl = ip6t_alloc_initial_table(&packet_raw); + repl = ip6t_alloc_initial_table(table); if (repl == NULL) return -ENOMEM; - ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops, + ret = ip6t_register_table(net, table, repl, rawtable_ops, &net->ipv6.ip6table_raw); kfree(repl); return ret; @@ -62,9 +80,16 @@ static struct pernet_operations ip6table_raw_net_ops = { static int __init ip6table_raw_init(void) { int ret; + const struct xt_table *table = &packet_raw; + + if (raw_before_defrag) { + table = &packet_raw_before_defrag; + + pr_info("Enabling raw table before defrag\n"); + } /* Register hooks */ - rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook); + rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook); if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 977d8900cfd1..ce53dcfda88a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -231,7 +231,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, if ((unsigned int)end > IPV6_MAXPLEN) { pr_debug("offset is too large.\n"); - return -1; + return -EINVAL; } ecn = ip6_frag_ecn(ipv6_hdr(skb)); @@ -264,7 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, * this case. -DaveM */ pr_debug("end of fragment not rounded to 8 bytes.\n"); - return -1; + return -EPROTO; } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ @@ -358,7 +358,7 @@ found: discard_fq: inet_frag_kill(&fq->q, &nf_frags); err: - return -1; + return -EINVAL; } /* @@ -567,6 +567,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { + u16 savethdr = skb->transport_header; struct net_device *dev = skb->dev; int fhoff, nhoff, ret; struct frag_hdr *fhdr; @@ -600,8 +601,12 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) spin_lock_bh(&fq->q.lock); - if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { - ret = -EINVAL; + ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); + if (ret < 0) { + if (ret == -EPROTO) { + skb->transport_header = savethdr; + ret = 0; + } goto out_unlock; } diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index b326da59257f..c87b48359e8f 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -63,6 +63,9 @@ static unsigned int ipv6_defrag(void *priv, /* Previously seen (loopback)? */ if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; + + if (skb->_nfct == IP_CT_UNTRACKED) + return NF_ACCEPT; #endif err = nf_ct_frag6_gather(state->net, skb, diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c index 0c3b9d32f64f..fff21602875a 100644 --- a/net/ipv6/netfilter/nf_flow_table_ipv6.c +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -5,7 +5,6 @@ #include <linux/rhashtable.h> #include <linux/ipv6.h> #include <linux/netdevice.h> -#include <linux/ipv6.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/neighbour.h> diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 9cd45b964123..17e03589331c 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -28,40 +28,6 @@ static unsigned int nft_do_chain_ipv6(void *priv, return nft_do_chain(&pkt, priv); } -static struct nft_af_info nft_af_ipv6 __read_mostly = { - .family = NFPROTO_IPV6, - .nhooks = NF_INET_NUMHOOKS, - .owner = THIS_MODULE, -}; - -static int nf_tables_ipv6_init_net(struct net *net) -{ - net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.ipv6 == NULL) - return -ENOMEM; - - memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6)); - - if (nft_register_afinfo(net, net->nft.ipv6) < 0) - goto err; - - return 0; -err: - kfree(net->nft.ipv6); - return -ENOMEM; -} - -static void nf_tables_ipv6_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.ipv6); - kfree(net->nft.ipv6); -} - -static struct pernet_operations nf_tables_ipv6_net_ops = { - .init = nf_tables_ipv6_init_net, - .exit = nf_tables_ipv6_exit_net, -}; - static const struct nf_chain_type filter_ipv6 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -83,22 +49,11 @@ static const struct nf_chain_type filter_ipv6 = { static int __init nf_tables_ipv6_init(void) { - int ret; - - ret = nft_register_chain_type(&filter_ipv6); - if (ret < 0) - return ret; - - ret = register_pernet_subsys(&nf_tables_ipv6_net_ops); - if (ret < 0) - nft_unregister_chain_type(&filter_ipv6); - - return ret; + return nft_register_chain_type(&filter_ipv6); } static void __exit nf_tables_ipv6_exit(void) { - unregister_pernet_subsys(&nf_tables_ipv6_net_ops); nft_unregister_chain_type(&filter_ipv6); } @@ -107,4 +62,4 @@ module_exit(nf_tables_ipv6_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <[email protected]>"); -MODULE_ALIAS_NFT_FAMILY(AF_INET6); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter"); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f85da2f1e729..aa4411c81e7e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2734,6 +2734,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (!dev) goto out; + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + if (!ipv6_addr_any(&cfg->fc_prefsrc)) { if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { NL_SET_ERR_MSG(extack, "Invalid source address"); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d883c9204c01..278e49cd67d4 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, { struct tcphdr *th; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a0f89ad76f9d..2a04dc9c781b 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, const struct ipv6hdr *ipv6h; struct udphdr *uh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 4e12859bc2ee..bb935a3b7fea 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -92,6 +92,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); + eth_hdr(skb)->h_proto = skb->protocol; err = 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index d4e98f20fc2a..4a8d407f8902 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock, if (!csk) return -EINVAL; - /* We must prevent loops or risk deadlock ! */ - if (csk->sk_family == PF_KCM) + /* Only allow TCP sockets to be attached for now */ + if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || + csk->sk_protocol != IPPROTO_TCP) + return -EOPNOTSUPP; + + /* Don't allow listeners or closed sockets */ + if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) return -EOPNOTSUPP; psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); @@ -1405,9 +1410,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock, return err; } - sock_hold(csk); - write_lock_bh(&csk->sk_callback_lock); + + /* Check if sk_user_data is aready by KCM or someone else. + * Must be done under lock to prevent race conditions. + */ + if (csk->sk_user_data) { + write_unlock_bh(&csk->sk_callback_lock); + strp_done(&psock->strp); + kmem_cache_free(kcm_psockp, psock); + return -EALREADY; + } + psock->save_data_ready = csk->sk_data_ready; psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; @@ -1415,8 +1429,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock, csk->sk_data_ready = psock_data_ready; csk->sk_write_space = psock_write_space; csk->sk_state_change = psock_state_change; + write_unlock_bh(&csk->sk_callback_lock); + sock_hold(csk); + /* Finished initialization, now add the psock to the MUX. */ spin_lock_bh(&mux->lock); head = &mux->psocks; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 62285fc6eb59..194a7483bb93 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -730,11 +730,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, "%s: recv data ns=%u, session nr=%u\n", session->name, ns, session->nr); } + ptr += 4; } - /* Advance past L2-specific header, if present */ - ptr += session->l2specific_len; - if (L2TP_SKB_CB(skb)->has_seq) { /* Received a packet with sequence numbers. If we're the LNS, * check if we sre sending sequence numbers and if not, @@ -1048,21 +1046,20 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf) memcpy(bufp, &session->cookie[0], session->cookie_len); bufp += session->cookie_len; } - if (session->l2specific_len) { - if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { - u32 l2h = 0; - if (session->send_seq) { - l2h = 0x40000000 | session->ns; - session->ns++; - session->ns &= 0xffffff; - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: updated ns to %u\n", - session->name, session->ns); - } + if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { + u32 l2h = 0; - *((__be32 *) bufp) = htonl(l2h); + if (session->send_seq) { + l2h = 0x40000000 | session->ns; + session->ns++; + session->ns &= 0xffffff; + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: updated ns to %u\n", + session->name, session->ns); } - bufp += session->l2specific_len; + + *((__be32 *)bufp) = htonl(l2h); + bufp += 4; } return bufp - optr; @@ -1719,7 +1716,7 @@ int l2tp_session_delete(struct l2tp_session *session) EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or - * l2specific_len parameters are set. + * l2specific_type parameters are set. */ void l2tp_session_set_header_len(struct l2tp_session *session, int version) { @@ -1728,7 +1725,8 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version) if (session->send_seq) session->hdr_len += 4; } else { - session->hdr_len = 4 + session->cookie_len + session->l2specific_len; + session->hdr_len = 4 + session->cookie_len; + session->hdr_len += l2tp_get_l2specific_len(session); if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } @@ -1779,7 +1777,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn session->lns_mode = cfg->lns_mode; session->reorder_timeout = cfg->reorder_timeout; session->l2specific_type = cfg->l2specific_type; - session->l2specific_len = cfg->l2specific_len; session->cookie_len = cfg->cookie_len; memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len); session->peer_cookie_len = cfg->peer_cookie_len; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index c2e9bbd79b35..9bbee90e9963 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -59,7 +59,6 @@ struct l2tp_session_cfg { int debug; /* bitmask of debug message * categories */ u16 vlan_id; /* VLAN pseudowire only */ - u16 l2specific_len; /* Layer 2 specific length */ u16 l2specific_type; /* Layer 2 specific type */ u8 cookie[8]; /* optional cookie */ int cookie_len; /* 0, 4 or 8 bytes */ @@ -85,7 +84,6 @@ struct l2tp_session { int cookie_len; u8 peer_cookie[8]; int peer_cookie_len; - u16 l2specific_len; u16 l2specific_type; u16 hdr_len; u32 nr; /* session NR state (receive) */ @@ -302,6 +300,17 @@ static inline void l2tp_session_dec_refcount(struct l2tp_session *session) l2tp_session_free(session); } +static inline int l2tp_get_l2specific_len(struct l2tp_session *session) +{ + switch (session->l2specific_type) { + case L2TP_L2SPECTYPE_DEFAULT: + return 4; + case L2TP_L2SPECTYPE_NONE: + default: + return 0; + } +} + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 2c30587d1a14..72e713da4733 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -181,7 +181,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) session->debug, jiffies_to_msecs(session->reorder_timeout)); seq_printf(m, " offset 0 l2specific %hu/%hu\n", - session->l2specific_type, session->l2specific_len); + session->l2specific_type, l2tp_get_l2specific_len(session)); if (session->cookie_len) { seq_printf(m, " cookie %02x%02x%02x%02x", session->cookie[0], session->cookie[1], diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index e1ca29f79821..e7ea9c4b89ff 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -550,13 +550,16 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf if (info->attrs[L2TP_ATTR_DATA_SEQ]) cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]); - cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT; - if (info->attrs[L2TP_ATTR_L2SPEC_TYPE]) + if (info->attrs[L2TP_ATTR_L2SPEC_TYPE]) { cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]); - - cfg.l2specific_len = 4; - if (info->attrs[L2TP_ATTR_L2SPEC_LEN]) - cfg.l2specific_len = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_LEN]); + if (cfg.l2specific_type != L2TP_L2SPECTYPE_DEFAULT && + cfg.l2specific_type != L2TP_L2SPECTYPE_NONE) { + ret = -EINVAL; + goto out_tunnel; + } + } else { + cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT; + } if (info->attrs[L2TP_ATTR_COOKIE]) { u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]); @@ -617,27 +620,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf goto out_tunnel; } - /* Check that pseudowire-specific params are present */ - switch (cfg.pw_type) { - case L2TP_PWTYPE_NONE: - break; - case L2TP_PWTYPE_ETH_VLAN: - if (!info->attrs[L2TP_ATTR_VLAN_ID]) { - ret = -EINVAL; - goto out_tunnel; - } - break; - case L2TP_PWTYPE_ETH: - break; - case L2TP_PWTYPE_PPP: - case L2TP_PWTYPE_PPP_AC: - break; - case L2TP_PWTYPE_IP: - default: - ret = -EPROTONOSUPPORT; - break; - } - ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel, session_id, peer_session_id, diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index b15412c21ac9..444ea8d127fe 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -420,7 +420,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, default: p += scnprintf(p, sizeof(buf) + buf - p, "\t\tMAX-MPDU-UNKNOWN\n"); - }; + } switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case 0: p += scnprintf(p, sizeof(buf) + buf - p, @@ -438,7 +438,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, p += scnprintf(p, sizeof(buf) + buf - p, "\t\tUNKNOWN-MHZ: 0x%x\n", (vhtc->cap >> 2) & 0x3); - }; + } PFLAG(RXLDPC, "RXLDPC"); PFLAG(SHORT_GI_80, "SHORT-GI-80"); PFLAG(SHORT_GI_160, "SHORT-GI-160"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0ee0fcf3abbf..9019fa98003d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -506,7 +506,7 @@ config NFT_CT connection tracking information such as the flow state. config NFT_FLOW_OFFLOAD - depends on NF_CONNTRACK + depends on NF_CONNTRACK && NF_FLOW_TABLE tristate "Netfilter nf_tables hardware flow offload module" help This option adds the "flow_offload" expression that you can use to @@ -665,8 +665,9 @@ endif # NF_TABLES_NETDEV endif # NF_TABLES config NF_FLOW_TABLE_INET - select NF_FLOW_TABLE tristate "Netfilter flow table mixed IPv4/IPv6 module" + depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6 + select NF_FLOW_TABLE help This option adds the flow table mixed IPv4/IPv6 support. @@ -674,6 +675,7 @@ config NF_FLOW_TABLE_INET config NF_FLOW_TABLE tristate "Netfilter flow table module" + depends on NF_CONNTRACK && NF_TABLES help This option adds the flow table core infrastructure. diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 997dd387d259..0f6b8172fb9a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -140,7 +140,7 @@ nf_hook_entries_grow(const struct nf_hook_entries *old, if (reg->nat_hook && orig_ops[i]->nat_hook) { kvfree(new); - return ERR_PTR(-EEXIST); + return ERR_PTR(-EBUSY); } if (inserted || reg->priority > orig_ops[i]->priority) { @@ -377,8 +377,8 @@ static void nf_remove_net_hook(struct nf_hook_entries *old, } } -void __nf_unregister_net_hook(struct net *net, int pf, - const struct nf_hook_ops *reg) +static void __nf_unregister_net_hook(struct net *net, int pf, + const struct nf_hook_ops *reg) { struct nf_hook_entries __rcu **pp; struct nf_hook_entries *p; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 728bf31bb386..975a85a48d39 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -2122,7 +2122,6 @@ ip_set_init(void) return ret; } - pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL); return 0; } @@ -2138,3 +2137,5 @@ ip_set_fini(void) module_init(ip_set_init); module_exit(ip_set_fini); + +MODULE_DESCRIPTION("ip_set: protocol " __stringify(IPSET_PROTOCOL)); diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 299edc6add5a..1c98c907bc63 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -595,7 +595,6 @@ static int ip_vs_app_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_app_fops = { - .owner = THIS_MODULE, .open = ip_vs_app_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index f489b8db2406..370abbf6f421 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1143,7 +1143,6 @@ static int ip_vs_conn_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_conn_fops = { - .owner = THIS_MODULE, .open = ip_vs_conn_open, .read = seq_read, .llseek = seq_lseek, @@ -1221,7 +1220,6 @@ static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_conn_sync_fops = { - .owner = THIS_MODULE, .open = ip_vs_conn_sync_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index fff213eacf2a..5ebde4b15810 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2116,7 +2116,6 @@ static int ip_vs_info_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_info_fops = { - .owner = THIS_MODULE, .open = ip_vs_info_open, .read = seq_read, .llseek = seq_lseek, @@ -2161,7 +2160,6 @@ static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_stats_fops = { - .owner = THIS_MODULE, .open = ip_vs_stats_seq_open, .read = seq_read, .llseek = seq_lseek, @@ -2230,7 +2228,6 @@ static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) } static const struct file_operations ip_vs_stats_percpu_fops = { - .owner = THIS_MODULE, .open = ip_vs_stats_percpu_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index a95518261168..6d65389e308f 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -71,7 +71,7 @@ static inline bool already_closed(const struct nf_conn *conn) return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT || conn->proto.tcp.state == TCP_CONNTRACK_CLOSE; else - return 0; + return false; } static int key_diff(const u32 *a, const u32 *b, unsigned int klen) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 6a64d528d076..3d72a0842c01 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -58,8 +58,6 @@ #include "nf_internals.h" -#define NF_CONNTRACK_VERSION "0.5.0" - int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) __read_mostly; @@ -2068,10 +2066,6 @@ int nf_conntrack_init_start(void) if (!nf_conntrack_cachep) goto err_cachep; - printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", - NF_CONNTRACK_VERSION, nf_conntrack_htable_size, - nf_conntrack_max); - ret = nf_conntrack_expect_init(); if (ret < 0) goto err_expect; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index d6748a8a79c5..8ef21d9f9a00 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -649,7 +649,6 @@ static int exp_open(struct inode *inode, struct file *file) } static const struct file_operations exp_file_ops = { - .owner = THIS_MODULE, .open = exp_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7c7921a53b13..dd177ebee9aa 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -57,8 +57,6 @@ MODULE_LICENSE("GPL"); -static char __initdata version[] = "0.93"; - static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *l4proto) @@ -3425,7 +3423,6 @@ static int __init ctnetlink_init(void) { int ret; - pr_info("ctnetlink v%s: registering with nfnetlink.\n", version); ret = nfnetlink_subsys_register(&ctnl_subsys); if (ret < 0) { pr_err("ctnetlink_init: cannot register with nfnetlink.\n"); @@ -3459,8 +3456,6 @@ err_out: static void __exit ctnetlink_exit(void) { - pr_info("ctnetlink: unregistering from nfnetlink.\n"); - unregister_pernet_subsys(&ctnetlink_net_ops); nfnetlink_subsys_unregister(&ctnl_exp_subsys); nfnetlink_subsys_unregister(&ctnl_subsys); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 46d32baad095..9123fdec5e14 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -382,7 +382,6 @@ static int ct_open(struct inode *inode, struct file *file) } static const struct file_operations ct_file_ops = { - .owner = THIS_MODULE, .open = ct_open, .read = seq_read, .llseek = seq_lseek, @@ -475,7 +474,6 @@ static int ct_cpu_seq_open(struct inode *inode, struct file *file) } static const struct file_operations ct_cpu_seq_fops = { - .owner = THIS_MODULE, .open = ct_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8bb152a7cca4..c2c1b16b7538 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -402,7 +402,6 @@ static int nflog_open(struct inode *inode, struct file *file) } static const struct file_operations nflog_file_ops = { - .owner = THIS_MODULE, .open = nflog_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 7f55af5f3d1a..d67a96a25a68 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -15,8 +15,6 @@ #include <linux/netfilter_bridge.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv6.h> #include <net/protocol.h> #include <net/netfilter/nf_queue.h> #include <net/dst.h> diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 49bd8bb16b18..92139a087260 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -317,7 +317,6 @@ static int synproxy_cpu_seq_open(struct inode *inode, struct file *file) } static const struct file_operations synproxy_cpu_seq_fops = { - .owner = THIS_MODULE, .open = synproxy_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 336b81689ac9..0791813a1e7d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -26,86 +26,19 @@ static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); - -/** - * nft_register_afinfo - register nf_tables address family info - * - * @afi: address family info to register - * - * Register the address family for use with nf_tables. Returns zero on - * success or a negative errno code otherwise. - */ -int nft_register_afinfo(struct net *net, struct nft_af_info *afi) -{ - INIT_LIST_HEAD(&afi->tables); - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail_rcu(&afi->list, &net->nft.af_info); - nfnl_unlock(NFNL_SUBSYS_NFTABLES); - return 0; -} -EXPORT_SYMBOL_GPL(nft_register_afinfo); - -static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi); - -/** - * nft_unregister_afinfo - unregister nf_tables address family info - * - * @afi: address family info to unregister - * - * Unregister the address family for use with nf_tables. - */ -void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi) -{ - nfnl_lock(NFNL_SUBSYS_NFTABLES); - __nft_release_afinfo(net, afi); - list_del_rcu(&afi->list); - nfnl_unlock(NFNL_SUBSYS_NFTABLES); -} -EXPORT_SYMBOL_GPL(nft_unregister_afinfo); - -static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family) -{ - struct nft_af_info *afi; - - list_for_each_entry(afi, &net->nft.af_info, list) { - if (afi->family == family) - return afi; - } - return NULL; -} - -static struct nft_af_info * -nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) -{ - struct nft_af_info *afi; - - afi = nft_afinfo_lookup(net, family); - if (afi != NULL) - return afi; -#ifdef CONFIG_MODULES - if (autoload) { - nfnl_unlock(NFNL_SUBSYS_NFTABLES); - request_module("nft-afinfo-%u", family); - nfnl_lock(NFNL_SUBSYS_NFTABLES); - afi = nft_afinfo_lookup(net, family); - if (afi != NULL) - return ERR_PTR(-EAGAIN); - } -#endif - return ERR_PTR(-EAFNOSUPPORT); -} +static u64 table_handle; static void nft_ctx_init(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, - struct nft_af_info *afi, + u8 family, struct nft_table *table, struct nft_chain *chain, const struct nlattr * const *nla) { ctx->net = net; - ctx->afi = afi; + ctx->family = family; ctx->table = table; ctx->chain = chain; ctx->nla = nla; @@ -385,30 +318,61 @@ static int nft_delflowtable(struct nft_ctx *ctx, * Tables */ -static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, +static struct nft_table *nft_table_lookup(const struct net *net, const struct nlattr *nla, - u8 genmask) + u8 family, u8 genmask) { struct nft_table *table; - list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(table, &net->nft.tables, list) { if (!nla_strcmp(nla, table->name) && + table->family == family && + nft_active_genmask(table, genmask)) + return table; + } + return NULL; +} + +static struct nft_table *nft_table_lookup_byhandle(const struct net *net, + const struct nlattr *nla, + u8 genmask) +{ + struct nft_table *table; + + list_for_each_entry(table, &net->nft.tables, list) { + if (be64_to_cpu(nla_get_be64(nla)) == table->handle && nft_active_genmask(table, genmask)) return table; } return NULL; } -static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, +static struct nft_table *nf_tables_table_lookup(const struct net *net, const struct nlattr *nla, - u8 genmask) + u8 family, u8 genmask) { struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); - table = nft_table_lookup(afi, nla, genmask); + table = nft_table_lookup(net, nla, family, genmask); + if (table != NULL) + return table; + + return ERR_PTR(-ENOENT); +} + +static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net, + const struct nlattr *nla, + u8 genmask) +{ + struct nft_table *table; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + table = nft_table_lookup_byhandle(net, nla, genmask); if (table != NULL) return table; @@ -423,7 +387,7 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX]; static const struct nf_chain_type * -__nf_tables_chain_type_lookup(int family, const struct nlattr *nla) +__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) { int i; @@ -436,22 +400,20 @@ __nf_tables_chain_type_lookup(int family, const struct nlattr *nla) } static const struct nf_chain_type * -nf_tables_chain_type_lookup(const struct nft_af_info *afi, - const struct nlattr *nla, - bool autoload) +nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload) { const struct nf_chain_type *type; - type = __nf_tables_chain_type_lookup(afi->family, nla); + type = __nf_tables_chain_type_lookup(nla, family); if (type != NULL) return type; #ifdef CONFIG_MODULES if (autoload) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); - request_module("nft-chain-%u-%.*s", afi->family, + request_module("nft-chain-%u-%.*s", family, nla_len(nla), (const char *)nla_data(nla)); nfnl_lock(NFNL_SUBSYS_NFTABLES); - type = __nf_tables_chain_type_lookup(afi->family, nla); + type = __nf_tables_chain_type_lookup(nla, family); if (type != NULL) return ERR_PTR(-EAGAIN); } @@ -463,6 +425,7 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, + [NFTA_TABLE_HANDLE] = { .type = NLA_U64 }, }; static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, @@ -484,7 +447,9 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || - nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) + nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || + nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), + NFTA_TABLE_PAD)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -509,7 +474,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) goto err; err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->afi->family, ctx->table); + event, 0, ctx->family, ctx->table); if (err < 0) { kfree_skb(skb); goto err; @@ -526,7 +491,6 @@ static int nf_tables_dump_tables(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_af_info *afi; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); @@ -535,30 +499,27 @@ static int nf_tables_dump_tables(struct sk_buff *skb, rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (family != NFPROTO_UNSPEC && family != afi->family) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (family != NFPROTO_UNSPEC && family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (!nft_is_active(net, table)) - continue; - if (nf_tables_fill_table_info(skb, net, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWTABLE, - NLM_F_MULTI, - afi->family, table) < 0) - goto done; - - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (!nft_is_active(net, table)) + continue; + if (nf_tables_fill_table_info(skb, net, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWTABLE, NLM_F_MULTI, + table->family, table) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } + idx++; } done: rcu_read_unlock(); @@ -573,7 +534,6 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); - const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; int family = nfmsg->nfgen_family; @@ -586,11 +546,8 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -611,10 +568,7 @@ err: return err; } -static void _nf_tables_table_disable(struct net *net, - const struct nft_af_info *afi, - struct nft_table *table, - u32 cnt) +static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt) { struct nft_chain *chain; u32 i = 0; @@ -632,9 +586,7 @@ static void _nf_tables_table_disable(struct net *net, } } -static int nf_tables_table_enable(struct net *net, - const struct nft_af_info *afi, - struct nft_table *table) +static int nf_tables_table_enable(struct net *net, struct nft_table *table) { struct nft_chain *chain; int err, i = 0; @@ -654,15 +606,13 @@ static int nf_tables_table_enable(struct net *net, return 0; err: if (i) - _nf_tables_table_disable(net, afi, table, i); + nft_table_disable(net, table, i); return err; } -static void nf_tables_table_disable(struct net *net, - const struct nft_af_info *afi, - struct nft_table *table) +static void nf_tables_table_disable(struct net *net, struct nft_table *table) { - _nf_tables_table_disable(net, afi, table, 0); + nft_table_disable(net, table, 0); } static int nf_tables_updtable(struct nft_ctx *ctx) @@ -691,7 +641,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) nft_trans_table_enable(trans) = false; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table); + ret = nf_tables_table_enable(ctx->net, ctx->table); if (ret >= 0) { ctx->table->flags &= ~NFT_TABLE_F_DORMANT; nft_trans_table_enable(trans) = true; @@ -716,19 +666,14 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); const struct nlattr *name; - struct nft_af_info *afi; struct nft_table *table; int family = nfmsg->nfgen_family; u32 flags = 0; struct nft_ctx ctx; int err; - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - name = nla[NFTA_TABLE_NAME]; - table = nf_tables_table_lookup(afi, name, genmask); + table = nf_tables_table_lookup(net, name, family, genmask); if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); @@ -738,7 +683,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); return nf_tables_updtable(&ctx); } @@ -748,39 +693,35 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, return -EINVAL; } - err = -EAFNOSUPPORT; - if (!try_module_get(afi->owner)) - goto err1; - err = -ENOMEM; table = kzalloc(sizeof(*table), GFP_KERNEL); if (table == NULL) - goto err2; + goto err_kzalloc; table->name = nla_strdup(name, GFP_KERNEL); if (table->name == NULL) - goto err3; + goto err_strdup; INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->objects); INIT_LIST_HEAD(&table->flowtables); + table->family = family; table->flags = flags; + table->handle = ++table_handle; - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); if (err < 0) - goto err4; + goto err_trans; - list_add_tail_rcu(&table->list, &afi->tables); + list_add_tail_rcu(&table->list, &net->nft.tables); return 0; -err4: +err_trans: kfree(table->name); -err3: +err_strdup: kfree(table); -err2: - module_put(afi->owner); -err1: +err_kzalloc: return err; } @@ -846,30 +787,28 @@ out: static int nft_flush(struct nft_ctx *ctx, int family) { - struct nft_af_info *afi; struct nft_table *table, *nt; const struct nlattr * const *nla = ctx->nla; int err = 0; - list_for_each_entry(afi, &ctx->net->nft.af_info, list) { - if (family != AF_UNSPEC && afi->family != family) + list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) { + if (family != AF_UNSPEC && table->family != family) continue; - ctx->afi = afi; - list_for_each_entry_safe(table, nt, &afi->tables, list) { - if (!nft_is_active_next(ctx->net, table)) - continue; + ctx->family = table->family; - if (nla[NFTA_TABLE_NAME] && - nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) - continue; + if (!nft_is_active_next(ctx->net, table)) + continue; - ctx->table = table; + if (nla[NFTA_TABLE_NAME] && + nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) + continue; - err = nft_flush_table(ctx); - if (err < 0) - goto out; - } + ctx->table = table; + + err = nft_flush_table(ctx); + if (err < 0) + goto out; } out: return err; @@ -882,20 +821,23 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); - struct nft_af_info *afi; struct nft_table *table; int family = nfmsg->nfgen_family; struct nft_ctx ctx; - nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla); - if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL) + nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla); + if (family == AF_UNSPEC || + (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE])) return nft_flush(&ctx, family); - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); + if (nla[NFTA_TABLE_HANDLE]) + table = nf_tables_table_lookup_byhandle(net, + nla[NFTA_TABLE_HANDLE], + genmask); + else + table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], + family, genmask); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -903,7 +845,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, table->use > 0) return -EBUSY; - ctx.afi = afi; + ctx.family = family; ctx.table = table; return nft_flush_table(&ctx); @@ -915,7 +857,6 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) kfree(ctx->table->name); kfree(ctx->table); - module_put(ctx->afi->owner); } int nft_register_chain_type(const struct nf_chain_type *ctype) @@ -1116,7 +1057,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->afi->family, ctx->table, + event, 0, ctx->family, ctx->table, ctx->chain); if (err < 0) { kfree_skb(skb); @@ -1134,7 +1075,6 @@ static int nf_tables_dump_chains(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; unsigned int idx = 0, s_idx = cb->args[0]; @@ -1144,31 +1084,30 @@ static int nf_tables_dump_chains(struct sk_buff *skb, rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (family != NFPROTO_UNSPEC && family != afi->family) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (family != NFPROTO_UNSPEC && family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - list_for_each_entry_rcu(chain, &table->chains, list) { - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (!nft_is_active(net, chain)) - continue; - if (nf_tables_fill_chain_info(skb, net, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWCHAIN, - NLM_F_MULTI, - afi->family, table, chain) < 0) - goto done; + list_for_each_entry_rcu(chain, &table->chains, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (!nft_is_active(net, chain)) + continue; + if (nf_tables_fill_chain_info(skb, net, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWCHAIN, + NLM_F_MULTI, + table->family, table, + chain) < 0) + goto done; - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } + idx++; } } done: @@ -1184,7 +1123,6 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); - const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; struct sk_buff *skb2; @@ -1198,11 +1136,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -1310,8 +1245,8 @@ struct nft_chain_hook { static int nft_chain_parse_hook(struct net *net, const struct nlattr * const nla[], - struct nft_af_info *afi, - struct nft_chain_hook *hook, bool create) + struct nft_chain_hook *hook, u8 family, + bool create) { struct nlattr *ha[NFTA_HOOK_MAX + 1]; const struct nf_chain_type *type; @@ -1328,15 +1263,12 @@ static int nft_chain_parse_hook(struct net *net, return -EINVAL; hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); - if (hook->num >= afi->nhooks) - return -EINVAL; - hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT]; + type = chain_type[family][NFT_CHAIN_T_DEFAULT]; if (nla[NFTA_CHAIN_TYPE]) { - type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE], - create); + type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE], + family, create); if (IS_ERR(type)) return PTR_ERR(type); } @@ -1353,7 +1285,7 @@ static int nft_chain_parse_hook(struct net *net, hook->type = type; hook->dev = NULL; - if (afi->flags & NFT_AF_NEEDS_DEV) { + if (family == NFPROTO_NETDEV) { char ifname[IFNAMSIZ]; if (!ha[NFTA_HOOK_DEV]) { @@ -1388,7 +1320,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, { const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; - struct nft_af_info *afi = ctx->afi; struct nft_base_chain *basechain; struct nft_stats __percpu *stats; struct net *net = ctx->net; @@ -1402,7 +1333,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_chain_hook hook; struct nf_hook_ops *ops; - err = nft_chain_parse_hook(net, nla, afi, &hook, create); + err = nft_chain_parse_hook(net, nla, &hook, family, create); if (err < 0) return err; @@ -1495,7 +1426,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, if (!nft_is_base_chain(chain)) return -EBUSY; - err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook, + err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family, create); if (err < 0) return err; @@ -1574,7 +1505,6 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, const struct nlattr * uninitialized_var(name); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; u8 policy = NF_ACCEPT; @@ -1584,11 +1514,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -1628,7 +1555,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, } } - nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); if (chain != NULL) { if (nlh->nlmsg_flags & NLM_F_EXCL) @@ -1649,24 +1576,26 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule; int family = nfmsg->nfgen_family; struct nft_ctx ctx; + u64 handle; u32 use; int err; - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); + if (nla[NFTA_CHAIN_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); + chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); + } else { + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); + } if (IS_ERR(chain)) return PTR_ERR(chain); @@ -1674,7 +1603,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, chain->use > 0) return -EBUSY; - nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); use = chain->use; list_for_each_entry(rule, &chain->rules, list) { @@ -1839,7 +1768,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, if (err < 0) return err; - type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]); + type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); if (IS_ERR(type)) return PTR_ERR(type); @@ -2062,7 +1991,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->afi->family, ctx->table, + event, 0, ctx->family, ctx->table, ctx->chain, rule); if (err < 0) { kfree_skb(skb); @@ -2086,7 +2015,6 @@ static int nf_tables_dump_rules(struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); const struct nft_rule_dump_ctx *ctx = cb->data; - const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; const struct nft_rule *rule; @@ -2097,39 +2025,37 @@ static int nf_tables_dump_rules(struct sk_buff *skb, rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (family != NFPROTO_UNSPEC && family != afi->family) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (family != NFPROTO_UNSPEC && family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - if (ctx && ctx->table && - strcmp(ctx->table, table->name) != 0) + if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) + continue; + + list_for_each_entry_rcu(chain, &table->chains, list) { + if (ctx && ctx->chain && + strcmp(ctx->chain, chain->name) != 0) continue; - list_for_each_entry_rcu(chain, &table->chains, list) { - if (ctx && ctx->chain && - strcmp(ctx->chain, chain->name) != 0) - continue; - - list_for_each_entry_rcu(rule, &chain->rules, list) { - if (!nft_is_active(net, rule)) - goto cont; - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWRULE, - NLM_F_MULTI | NLM_F_APPEND, - afi->family, table, chain, rule) < 0) - goto done; - - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + list_for_each_entry_rcu(rule, &chain->rules, list) { + if (!nft_is_active(net, rule)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWRULE, + NLM_F_MULTI | NLM_F_APPEND, + table->family, + table, chain, rule) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } + idx++; } } } @@ -2159,7 +2085,6 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); - const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; const struct nft_rule *rule; @@ -2203,11 +2128,8 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -2264,7 +2186,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); - struct nft_af_info *afi; + int family = nfmsg->nfgen_family; struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; @@ -2280,11 +2202,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -2323,7 +2242,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return PTR_ERR(old_rule); } - nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); n = 0; size = 0; @@ -2447,18 +2366,14 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain = NULL; struct nft_rule *rule; int family = nfmsg->nfgen_family, err = 0; struct nft_ctx ctx; - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -2469,7 +2384,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, return PTR_ERR(chain); } - nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); if (chain) { if (nla[NFTA_RULE_HANDLE]) { @@ -2636,6 +2551,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, + [NFTA_SET_HANDLE] = { .type = NLA_U64 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2649,26 +2565,17 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct nft_af_info *afi = NULL; + int family = nfmsg->nfgen_family; struct nft_table *table = NULL; - if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - } - if (nla[NFTA_SET_TABLE] != NULL) { - if (afi == NULL) - return -EAFNOSUPPORT; - - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], - genmask); + table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], + family, genmask); if (IS_ERR(table)) return PTR_ERR(table); } - nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); return 0; } @@ -2688,6 +2595,22 @@ static struct nft_set *nf_tables_set_lookup(const struct nft_table *table, return ERR_PTR(-ENOENT); } +static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table, + const struct nlattr *nla, u8 genmask) +{ + struct nft_set *set; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(set, &table->sets, list) { + if (be64_to_cpu(nla_get_be64(nla)) == set->handle && + nft_active_genmask(set, genmask)) + return set; + } + return ERR_PTR(-ENOENT); +} + static struct nft_set *nf_tables_set_lookup_byid(const struct net *net, const struct nlattr *nla, u8 genmask) @@ -2795,7 +2718,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx->afi->family; + nfmsg->nfgen_family = ctx->family; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); @@ -2803,6 +2726,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_NAME, set->name)) goto nla_put_failure; + if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle), + NFTA_SET_PAD)) + goto nla_put_failure; if (set->flags != 0) if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) goto nla_put_failure; @@ -2887,10 +2813,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) { const struct nft_set *set; unsigned int idx, s_idx = cb->args[0]; - struct nft_af_info *afi; struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); - int cur_family = cb->args[3]; struct nft_ctx *ctx = cb->data, ctx_set; if (cb->args[1]) @@ -2899,51 +2823,44 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (ctx->afi && ctx->afi != afi) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (ctx->family != NFPROTO_UNSPEC && + ctx->family != table->family) continue; - if (cur_family) { - if (afi->family != cur_family) + if (ctx->table && ctx->table != table) + continue; + + if (cur_table) { + if (cur_table != table) continue; - cur_family = 0; + cur_table = NULL; } - list_for_each_entry_rcu(table, &afi->tables, list) { - if (ctx->table && ctx->table != table) - continue; + idx = 0; + list_for_each_entry_rcu(set, &table->sets, list) { + if (idx < s_idx) + goto cont; + if (!nft_is_active(net, set)) + goto cont; - if (cur_table) { - if (cur_table != table) - continue; + ctx_set = *ctx; + ctx_set.table = table; + ctx_set.family = table->family; - cur_table = NULL; + if (nf_tables_fill_set(skb, &ctx_set, set, + NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + cb->args[2] = (unsigned long) table; + goto done; } - idx = 0; - list_for_each_entry_rcu(set, &table->sets, list) { - if (idx < s_idx) - goto cont; - if (!nft_is_active(net, set)) - goto cont; - - ctx_set = *ctx; - ctx_set.table = table; - ctx_set.afi = afi; - if (nf_tables_fill_set(skb, &ctx_set, set, - NFT_MSG_NEWSET, - NLM_F_MULTI) < 0) { - cb->args[0] = idx; - cb->args[2] = (unsigned long) table; - cb->args[3] = afi->family; - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } - if (s_idx) - s_idx = 0; + idx++; } + if (s_idx) + s_idx = 0; } cb->args[1] = 1; done: @@ -3041,8 +2958,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; const struct nft_set_ops *ops; - struct nft_af_info *afi; struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; @@ -3149,15 +3066,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) { @@ -3223,6 +3137,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, set->udata = udata; set->timeout = timeout; set->gc_int = gc_int; + set->handle = nf_tables_alloc_handle(table); err = ops->init(set, &desc, nla); if (err < 0) @@ -3280,7 +3195,10 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); + if (nla[NFTA_SET_HANDLE]) + set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask); + else + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set); @@ -3415,19 +3333,15 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct nft_af_info *afi; + int family = nfmsg->nfgen_family; struct nft_table *table; - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], - genmask); + table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], + family, genmask); if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); return 0; } @@ -3532,7 +3446,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); - struct nft_af_info *afi; struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; @@ -3544,21 +3457,19 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) int event; rcu_read_lock(); - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (afi != dump_ctx->ctx.afi) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (dump_ctx->ctx.family != NFPROTO_UNSPEC && + dump_ctx->ctx.family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - if (table != dump_ctx->ctx.table) - continue; + if (table != dump_ctx->ctx.table) + continue; - list_for_each_entry_rcu(set, &table->sets, list) { - if (set == dump_ctx->set) { - set_found = true; - break; - } + list_for_each_entry_rcu(set, &table->sets, list) { + if (set == dump_ctx->set) { + set_found = true; + break; } - break; } break; } @@ -3578,7 +3489,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = afi->family; + nfmsg->nfgen_family = table->family; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(net->nft.base_seq & 0xffff); @@ -3641,7 +3552,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx->afi->family; + nfmsg->nfgen_family = ctx->family; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); @@ -3998,7 +3909,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, list_for_each_entry(binding, &set->bindings, list) { struct nft_ctx bind_ctx = { .net = ctx->net, - .afi = ctx->afi, + .family = ctx->family, .table = ctx->table, .chain = (struct nft_chain *)binding->chain, }; @@ -4417,6 +4328,21 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, } EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); +struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table, + const struct nlattr *nla, + u32 objtype, u8 genmask) +{ + struct nft_object *obj; + + list_for_each_entry(obj, &table->objects, list) { + if (be64_to_cpu(nla_get_be64(nla)) == obj->handle && + objtype == obj->ops->type->type && + nft_active_genmask(obj, genmask)) + return obj; + } + return ERR_PTR(-ENOENT); +} + static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { [NFTA_OBJ_TABLE] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, @@ -4424,6 +4350,7 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, + [NFTA_OBJ_HANDLE] = { .type = NLA_U64}, }; static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, @@ -4529,7 +4456,6 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, const struct nft_object_type *type; u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct nft_af_info *afi; struct nft_table *table; struct nft_object *obj; struct nft_ctx ctx; @@ -4541,11 +4467,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, !nla[NFTA_OBJ_DATA]) return -EINVAL; - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -4563,7 +4486,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, return 0; } - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); type = nft_obj_type_get(objtype); if (IS_ERR(type)) @@ -4575,6 +4498,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, goto err1; } obj->table = table; + obj->handle = nf_tables_alloc_handle(table); + obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); if (!obj->name) { err = -ENOMEM; @@ -4621,7 +4546,9 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || - nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) + nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) || + nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle), + NFTA_OBJ_PAD)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -4640,7 +4567,6 @@ struct nft_obj_filter { static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_af_info *afi; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; struct nft_obj_filter *filter = cb->data; @@ -4655,38 +4581,37 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (family != NFPROTO_UNSPEC && family != afi->family) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (family != NFPROTO_UNSPEC && family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - list_for_each_entry_rcu(obj, &table->objects, list) { - if (!nft_is_active(net, obj)) - goto cont; - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (filter && filter->table[0] && - strcmp(filter->table, table->name)) - goto cont; - if (filter && - filter->type != NFT_OBJECT_UNSPEC && - obj->ops->type->type != filter->type) - goto cont; + list_for_each_entry_rcu(obj, &table->objects, list) { + if (!nft_is_active(net, obj)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (filter && filter->table[0] && + strcmp(filter->table, table->name)) + goto cont; + if (filter && + filter->type != NFT_OBJECT_UNSPEC && + obj->ops->type->type != filter->type) + goto cont; - if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWOBJ, - NLM_F_MULTI | NLM_F_APPEND, - afi->family, table, obj, reset) < 0) - goto done; + if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWOBJ, + NLM_F_MULTI | NLM_F_APPEND, + table->family, table, + obj, reset) < 0) + goto done; - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } + idx++; } } done: @@ -4738,7 +4663,6 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); int family = nfmsg->nfgen_family; - const struct nft_af_info *afi; const struct nft_table *table; struct nft_object *obj; struct sk_buff *skb2; @@ -4769,11 +4693,8 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, !nla[NFTA_OBJ_TYPE]) return -EINVAL; - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -4819,32 +4740,33 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct nft_af_info *afi; struct nft_table *table; struct nft_object *obj; struct nft_ctx ctx; u32 objtype; if (!nla[NFTA_OBJ_TYPE] || - !nla[NFTA_OBJ_NAME]) + (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE])) return -EINVAL; - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, + genmask); if (IS_ERR(table)) return PTR_ERR(table); objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (nla[NFTA_OBJ_HANDLE]) + obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE], + objtype, genmask); + else + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], + objtype, genmask); if (IS_ERR(obj)) return PTR_ERR(obj); if (obj->use > 0) return -EBUSY; - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); return nft_delobj(&ctx, obj); } @@ -4882,7 +4804,7 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx, struct nft_object *obj, int event) { nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, - ctx->afi->family, ctx->report, GFP_KERNEL); + ctx->family, ctx->report, GFP_KERNEL); } /* @@ -4910,6 +4832,7 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = { [NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING, .len = NFT_NAME_MAXLEN - 1 }, [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED }, + [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 }, }; struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, @@ -4927,6 +4850,20 @@ struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, } EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup); +struct nft_flowtable * +nf_tables_flowtable_lookup_byhandle(const struct nft_table *table, + const struct nlattr *nla, u8 genmask) +{ + struct nft_flowtable *flowtable; + + list_for_each_entry(flowtable, &table->flowtables, list) { + if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle && + nft_active_genmask(flowtable, genmask)) + return flowtable; + } + return ERR_PTR(-ENOENT); +} + #define NFT_FLOWTABLE_DEVICE_MAX 8 static int nf_tables_parse_devices(const struct nft_ctx *ctx, @@ -4993,7 +4930,7 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx, return -EINVAL; hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM])); - if (hooknum >= ctx->afi->nhooks) + if (hooknum != NF_NETDEV_INGRESS) return -EINVAL; priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY])); @@ -5009,6 +4946,8 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx, goto err1; } + flowtable->hooknum = hooknum; + flowtable->priority = priority; flowtable->ops = ops; flowtable->ops_len = n; @@ -5029,33 +4968,31 @@ err1: return err; } -static const struct nf_flowtable_type * -__nft_flowtable_type_get(const struct nft_af_info *afi) +static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) { const struct nf_flowtable_type *type; list_for_each_entry(type, &nf_tables_flowtables, list) { - if (afi->family == type->family) + if (family == type->family) return type; } return NULL; } -static const struct nf_flowtable_type * -nft_flowtable_type_get(const struct nft_af_info *afi) +static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family) { const struct nf_flowtable_type *type; - type = __nft_flowtable_type_get(afi); + type = __nft_flowtable_type_get(family); if (type != NULL && try_module_get(type->owner)) return type; #ifdef CONFIG_MODULES if (type == NULL) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); - request_module("nf-flowtable-%u", afi->family); + request_module("nf-flowtable-%u", family); nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (__nft_flowtable_type_get(afi)) + if (__nft_flowtable_type_get(family)) return ERR_PTR(-EAGAIN); } #endif @@ -5067,15 +5004,12 @@ void nft_flow_table_iterate(struct net *net, void *data) { struct nft_flowtable *flowtable; - const struct nft_af_info *afi; const struct nft_table *table; rcu_read_lock(); - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - list_for_each_entry_rcu(table, &afi->tables, list) { - list_for_each_entry_rcu(flowtable, &table->flowtables, list) { - iter(&flowtable->data, data); - } + list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(flowtable, &table->flowtables, list) { + iter(&flowtable->data, data); } } rcu_read_unlock(); @@ -5106,7 +5040,6 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; - struct nft_af_info *afi; struct nft_table *table; struct nft_ctx ctx; int err, i, k; @@ -5116,11 +5049,8 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, !nla[NFTA_FLOWTABLE_HOOK]) return -EINVAL; - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], + family, genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -5137,20 +5067,22 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, return 0; } - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); if (!flowtable) return -ENOMEM; flowtable->table = table; + flowtable->handle = nf_tables_alloc_handle(table); + flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL); if (!flowtable->name) { err = -ENOMEM; goto err1; } - type = nft_flowtable_type_get(afi); + type = nft_flowtable_type_get(family); if (IS_ERR(type)) { err = PTR_ERR(type); goto err2; @@ -5210,26 +5142,28 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk, u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; - struct nft_af_info *afi; struct nft_table *table; struct nft_ctx ctx; - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], + family, genmask); if (IS_ERR(table)) return PTR_ERR(table); - flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], - genmask); + if (nla[NFTA_FLOWTABLE_HANDLE]) + flowtable = nf_tables_flowtable_lookup_byhandle(table, + nla[NFTA_FLOWTABLE_HANDLE], + genmask); + else + flowtable = nf_tables_flowtable_lookup(table, + nla[NFTA_FLOWTABLE_NAME], + genmask); if (IS_ERR(flowtable)) return PTR_ERR(flowtable); if (flowtable->use > 0) return -EBUSY; - nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); return nft_delflowtable(&ctx, flowtable); } @@ -5256,7 +5190,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) || nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || - nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use))) + nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || + nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle), + NFTA_FLOWTABLE_PAD)) goto nla_put_failure; nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); @@ -5298,40 +5234,37 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; - const struct nft_af_info *afi; const struct nft_table *table; rcu_read_lock(); cb->seq = net->nft.base_seq; - list_for_each_entry_rcu(afi, &net->nft.af_info, list) { - if (family != NFPROTO_UNSPEC && family != afi->family) + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (family != NFPROTO_UNSPEC && family != table->family) continue; - list_for_each_entry_rcu(table, &afi->tables, list) { - list_for_each_entry_rcu(flowtable, &table->flowtables, list) { - if (!nft_is_active(net, flowtable)) - goto cont; - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (filter && filter->table[0] && - strcmp(filter->table, table->name)) - goto cont; + list_for_each_entry_rcu(flowtable, &table->flowtables, list) { + if (!nft_is_active(net, flowtable)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (filter && filter->table[0] && + strcmp(filter->table, table->name)) + goto cont; - if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWFLOWTABLE, - NLM_F_MULTI | NLM_F_APPEND, - afi->family, flowtable) < 0) - goto done; + if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWFLOWTABLE, + NLM_F_MULTI | NLM_F_APPEND, + table->family, flowtable) < 0) + goto done; - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: - idx++; - } + idx++; } } done: @@ -5384,7 +5317,6 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, u8 genmask = nft_genmask_cur(net); int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; - const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; int err; @@ -5410,17 +5342,14 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, if (!nla[NFTA_FLOWTABLE_NAME]) return -EINVAL; - afi = nf_tables_afinfo_lookup(net, family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], + family, genmask); if (IS_ERR(table)) return PTR_ERR(table); flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], genmask); - if (IS_ERR(table)) + if (IS_ERR(flowtable)) return PTR_ERR(flowtable); skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); @@ -5457,7 +5386,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, ctx->seq, event, 0, - ctx->afi->family, flowtable); + ctx->family, flowtable); if (err < 0) { kfree_skb(skb); goto err; @@ -5535,17 +5464,14 @@ static int nf_tables_flowtable_event(struct notifier_block *this, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nft_flowtable *flowtable; struct nft_table *table; - struct nft_af_info *afi; if (event != NETDEV_UNREGISTER) return 0; nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { - list_for_each_entry(table, &afi->tables, list) { - list_for_each_entry(flowtable, &table->flowtables, list) { - nft_flowtable_event(event, dev, flowtable); - } + list_for_each_entry(table, &dev_net(dev)->nft.tables, list) { + list_for_each_entry(flowtable, &table->flowtables, list) { + nft_flowtable_event(event, dev, flowtable); } } nfnl_unlock(NFNL_SUBSYS_NFTABLES); @@ -5798,7 +5724,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_table_update(trans)) { if (!nft_trans_table_enable(trans)) { nf_tables_table_disable(net, - trans->ctx.afi, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } @@ -5960,7 +5885,6 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) if (nft_trans_table_update(trans)) { if (nft_trans_table_enable(trans)) { nf_tables_table_disable(net, - trans->ctx.afi, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } @@ -6563,20 +6487,6 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); -static int __net_init nf_tables_init_net(struct net *net) -{ - INIT_LIST_HEAD(&net->nft.af_info); - INIT_LIST_HEAD(&net->nft.commit_list); - net->nft.base_seq = 1; - return 0; -} - -static void __net_exit nf_tables_exit_net(struct net *net) -{ - WARN_ON_ONCE(!list_empty(&net->nft.af_info)); - WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); -} - int __nft_release_basechain(struct nft_ctx *ctx) { struct nft_rule *rule, *nr; @@ -6597,8 +6507,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) } EXPORT_SYMBOL_GPL(__nft_release_basechain); -/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ -static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) +static void __nft_release_tables(struct net *net) { struct nft_flowtable *flowtable, *nf; struct nft_table *table, *nt; @@ -6608,10 +6517,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) struct nft_set *set, *ns; struct nft_ctx ctx = { .net = net, - .afi = afi, }; - list_for_each_entry_safe(table, nt, &afi->tables, list) { + list_for_each_entry_safe(table, nt, &net->nft.tables, list) { + ctx.family = table->family; + list_for_each_entry(chain, &table->chains, list) nf_tables_unregister_hook(net, table, chain); list_for_each_entry(flowtable, &table->flowtables, list) @@ -6652,6 +6562,21 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) } } +static int __net_init nf_tables_init_net(struct net *net) +{ + INIT_LIST_HEAD(&net->nft.tables); + INIT_LIST_HEAD(&net->nft.commit_list); + net->nft.base_seq = 1; + return 0; +} + +static void __net_exit nf_tables_exit_net(struct net *net) +{ + __nft_release_tables(net); + WARN_ON_ONCE(!list_empty(&net->nft.tables)); + WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); +} + static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .exit = nf_tables_exit_net, @@ -6678,7 +6603,6 @@ static int __init nf_tables_module_init(void) register_netdevice_notifier(&nf_tables_flowtable_notifier); - pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <[email protected]>\n"); return register_pernet_subsys(&nf_tables_net_ops); err3: nf_tables_core_module_exit(); diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index 58b9be7480bb..e30c7da09d0d 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -38,40 +38,6 @@ static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb, return nft_do_chain(&pkt, priv); } -static struct nft_af_info nft_af_inet __read_mostly = { - .family = NFPROTO_INET, - .nhooks = NF_INET_NUMHOOKS, - .owner = THIS_MODULE, -}; - -static int __net_init nf_tables_inet_init_net(struct net *net) -{ - net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.inet == NULL) - return -ENOMEM; - memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet)); - - if (nft_register_afinfo(net, net->nft.inet) < 0) - goto err; - - return 0; - -err: - kfree(net->nft.inet); - return -ENOMEM; -} - -static void __net_exit nf_tables_inet_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.inet); - kfree(net->nft.inet); -} - -static struct pernet_operations nf_tables_inet_net_ops = { - .init = nf_tables_inet_init_net, - .exit = nf_tables_inet_exit_net, -}; - static const struct nf_chain_type filter_inet = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -93,22 +59,11 @@ static const struct nf_chain_type filter_inet = { static int __init nf_tables_inet_init(void) { - int ret; - - ret = nft_register_chain_type(&filter_inet); - if (ret < 0) - return ret; - - ret = register_pernet_subsys(&nf_tables_inet_net_ops); - if (ret < 0) - nft_unregister_chain_type(&filter_inet); - - return ret; + return nft_register_chain_type(&filter_inet); } static void __exit nf_tables_inet_exit(void) { - unregister_pernet_subsys(&nf_tables_inet_net_ops); nft_unregister_chain_type(&filter_inet); } @@ -117,4 +72,4 @@ module_exit(nf_tables_inet_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <[email protected]>"); -MODULE_ALIAS_NFT_FAMILY(1); +MODULE_ALIAS_NFT_CHAIN(1, "filter"); diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 42f6f6d42a6d..4041fafca934 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -38,41 +38,6 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, return nft_do_chain(&pkt, priv); } -static struct nft_af_info nft_af_netdev __read_mostly = { - .family = NFPROTO_NETDEV, - .nhooks = NF_NETDEV_NUMHOOKS, - .owner = THIS_MODULE, - .flags = NFT_AF_NEEDS_DEV, -}; - -static int nf_tables_netdev_init_net(struct net *net) -{ - net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); - if (net->nft.netdev == NULL) - return -ENOMEM; - - memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev)); - - if (nft_register_afinfo(net, net->nft.netdev) < 0) - goto err; - - return 0; -err: - kfree(net->nft.netdev); - return -ENOMEM; -} - -static void nf_tables_netdev_exit_net(struct net *net) -{ - nft_unregister_afinfo(net, net->nft.netdev); - kfree(net->nft.netdev); -} - -static struct pernet_operations nf_tables_netdev_net_ops = { - .init = nf_tables_netdev_init_net, - .exit = nf_tables_netdev_exit_net, -}; - static const struct nf_chain_type nft_filter_chain_netdev = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, @@ -109,7 +74,6 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain, *nr; struct nft_ctx ctx = { @@ -121,20 +85,18 @@ static int nf_tables_netdev_event(struct notifier_block *this, return NOTIFY_DONE; nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { - ctx.afi = afi; - if (afi->family != NFPROTO_NETDEV) + list_for_each_entry(table, &ctx.net->nft.tables, list) { + if (table->family != NFPROTO_NETDEV) continue; - list_for_each_entry(table, &afi->tables, list) { - ctx.table = table; - list_for_each_entry_safe(chain, nr, &table->chains, list) { - if (!nft_is_base_chain(chain)) - continue; + ctx.family = table->family; + ctx.table = table; + list_for_each_entry_safe(chain, nr, &table->chains, list) { + if (!nft_is_base_chain(chain)) + continue; - ctx.chain = chain; - nft_netdev_event(event, dev, &ctx); - } + ctx.chain = chain; + nft_netdev_event(event, dev, &ctx); } } nfnl_unlock(NFNL_SUBSYS_NFTABLES); @@ -154,27 +116,21 @@ static int __init nf_tables_netdev_init(void) if (ret) return ret; - ret = register_pernet_subsys(&nf_tables_netdev_net_ops); - if (ret) - goto err1; - ret = register_netdevice_notifier(&nf_tables_netdev_notifier); if (ret) - goto err2; + goto err_register_netdevice_notifier; return 0; -err2: - unregister_pernet_subsys(&nf_tables_netdev_net_ops); -err1: +err_register_netdevice_notifier: nft_unregister_chain_type(&nft_filter_chain_netdev); + return ret; } static void __exit nf_tables_netdev_exit(void) { unregister_netdevice_notifier(&nf_tables_netdev_notifier); - unregister_pernet_subsys(&nf_tables_netdev_net_ops); nft_unregister_chain_type(&nft_filter_chain_netdev); } @@ -183,4 +139,4 @@ module_exit(nf_tables_netdev_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <[email protected]>"); -MODULE_ALIAS_NFT_FAMILY(5); /* NFPROTO_NETDEV */ +MODULE_ALIAS_NFT_CHAIN(5, "filter"); /* NFPROTO_NETDEV */ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 733d3e4a30d8..03ead8a9e90c 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -37,8 +37,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); rcu_dereference_protected(table[(id)].subsys, \ lockdep_nfnl_is_held((id))) -static char __initdata nfversion[] = "0.30"; - static struct { struct mutex mutex; const struct nfnetlink_subsystem __rcu *subsys; @@ -580,13 +578,11 @@ static int __init nfnetlink_init(void) for (i=0; i<NFNL_SUBSYS_COUNT; i++) mutex_init(&table[i].mutex); - pr_info("Netfilter messages via NETLINK v%s.\n", nfversion); return register_pernet_subsys(&nfnetlink_net_ops); } static void __exit nfnetlink_exit(void) { - pr_info("Removing netfilter NETLINK layer.\n"); unregister_pernet_subsys(&nfnetlink_net_ops); } module_init(nfnetlink_init); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c45e6d4358ab..88d427f9f9e6 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -527,7 +527,6 @@ static int __init nfnl_acct_init(void) goto err_out; } - pr_info("nfnl_acct: registering with nfnetlink.\n"); ret = nfnetlink_subsys_register(&nfnl_acct_subsys); if (ret < 0) { pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); @@ -543,7 +542,6 @@ err_out: static void __exit nfnl_acct_exit(void) { - pr_info("nfnl_acct: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&nfnl_acct_subsys); unregister_pernet_subsys(&nfnl_acct_ops); } diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 32b1c0b44e79..95b04702a655 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -615,8 +615,6 @@ err_out: static void __exit cttimeout_exit(void) { - pr_info("cttimeout: unregistering from nfnetlink.\n"); - nfnetlink_subsys_unregister(&cttimeout_subsys); unregister_pernet_subsys(&cttimeout_ops); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e955bec0acc6..7b46aa4c478d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1054,7 +1054,6 @@ static int nful_open(struct inode *inode, struct file *file) } static const struct file_operations nful_file_ops = { - .owner = THIS_MODULE, .open = nful_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 2db35f2d553d..8bba23160a68 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1477,7 +1477,6 @@ static int nfqnl_open(struct inode *inode, struct file *file) } static const struct file_operations nfqnl_file_ops = { - .owner = THIS_MODULE, .open = nfqnl_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index dcff0dc8d28b..8e23726b9081 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -144,7 +144,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, { par->net = ctx->net; par->table = ctx->table->name; - switch (ctx->afi->family) { + switch (ctx->family) { case AF_INET: entry->e4.ip.proto = proto; entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; @@ -175,7 +175,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, } else { par->hook_mask = 0; } - par->family = ctx->afi->family; + par->family = ctx->family; par->nft_compat = true; } @@ -267,7 +267,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) par.net = ctx->net; par.target = target; par.targinfo = info; - par.family = ctx->afi->family; + par.family = ctx->family; if (par.target->destroy != NULL) par.target->destroy(&par); @@ -358,7 +358,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, { par->net = ctx->net; par->table = ctx->table->name; - switch (ctx->afi->family) { + switch (ctx->family) { case AF_INET: entry->e4.ip.proto = proto; entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; @@ -389,7 +389,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, } else { par->hook_mask = 0; } - par->family = ctx->afi->family; + par->family = ctx->family; par->nft_compat = true; } @@ -446,7 +446,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) par.net = ctx->net; par.match = match; par.matchinfo = info; - par.family = ctx->afi->family; + par.family = ctx->family; if (par.match->destroy != NULL) par.match->destroy(&par); @@ -648,7 +648,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, mt_name = nla_data(tb[NFTA_MATCH_NAME]); rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); - family = ctx->afi->family; + family = ctx->family; /* Re-use the existing match if it's already loaded. */ list_for_each_entry(nft_match, &nft_match_list, head) { @@ -733,7 +733,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, tg_name = nla_data(tb[NFTA_TARGET_NAME]); rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV])); - family = ctx->afi->family; + family = ctx->family; /* Re-use the existing target if it's already loaded. */ list_for_each_entry(nft_target, &nft_target_list, head) { @@ -812,8 +812,6 @@ static int __init nft_compat_module_init(void) goto err_target; } - pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso <[email protected]>\n"); - return ret; err_target: diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 2647b895f4b0..6ab274b14484 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -405,7 +405,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_DIRECTION] == NULL) return -EINVAL; - switch (ctx->afi->family) { + switch (ctx->family) { case NFPROTO_IPV4: len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip); @@ -456,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nf_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->family); if (err < 0) return err; @@ -550,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, if (err < 0) goto err1; - err = nf_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->family); if (err < 0) goto err1; @@ -564,7 +564,7 @@ err1: static void nft_ct_get_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { - nf_ct_netns_put(ctx->net, ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->family); } static void nft_ct_set_destroy(const struct nft_ctx *ctx, @@ -573,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv = nft_expr_priv(expr); __nft_ct_set_destroy(ctx, priv); - nf_ct_netns_put(ctx->net, ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->family); } static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -734,7 +734,7 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, struct nft_ct_helper_obj *priv = nft_obj_data(obj); struct nf_conntrack_helper *help4, *help6; char name[NF_CT_HELPER_NAME_LEN]; - int family = ctx->afi->family; + int family = ctx->family; if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO]) return -EINVAL; @@ -753,14 +753,14 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, switch (family) { case NFPROTO_IPV4: - if (ctx->afi->family == NFPROTO_IPV6) + if (ctx->family == NFPROTO_IPV6) return -EINVAL; help4 = nf_conntrack_helper_try_module_get(name, family, priv->l4proto); break; case NFPROTO_IPV6: - if (ctx->afi->family == NFPROTO_IPV4) + if (ctx->family == NFPROTO_IPV4) return -EINVAL; help6 = nf_conntrack_helper_try_module_get(name, family, diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index ec0fd78231d8..fc83e29d6634 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -164,7 +164,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); - err = nft_validate_register_load(priv->sreg_key, set->klen);; + err = nft_validate_register_load(priv->sreg_key, set->klen); if (err < 0) return err; diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index dd38785dfed9..4503b8dcf9c0 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -151,7 +151,7 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx, priv->flowtable = flowtable; flowtable->use++; - return nf_ct_netns_get(ctx->net, ctx->afi->family); + return nf_ct_netns_get(ctx->net, ctx->family); } static void nft_flow_offload_destroy(const struct nft_ctx *ctx, @@ -160,7 +160,7 @@ static void nft_flow_offload_destroy(const struct nft_ctx *ctx, struct nft_flow_offload *priv = nft_expr_priv(expr); priv->flowtable->use--; - nf_ct_netns_put(ctx->net, ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->family); } static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 6f6e64423643..a27be36dc0af 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -112,7 +112,7 @@ static int nft_log_init(const struct nft_ctx *ctx, break; } - err = nf_logger_find_get(ctx->afi->family, li->type); + err = nf_logger_find_get(ctx->family, li->type); if (err < 0) goto err1; @@ -133,7 +133,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx, if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); - nf_logger_put(ctx->afi->family, li->type); + nf_logger_put(ctx->family, li->type); } static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 6ac03d4266c9..9d8655bc1bea 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -73,7 +73,7 @@ int nft_masq_init(const struct nft_ctx *ctx, } } - return nf_ct_netns_get(ctx->net, ctx->afi->family); + return nf_ct_netns_get(ctx->net, ctx->family); } EXPORT_SYMBOL_GPL(nft_masq_init); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 1a91e676f13e..8fb91940e2e7 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -339,7 +339,7 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx, if (priv->key != NFT_META_SECPATH) return 0; - switch (ctx->afi->family) { + switch (ctx->family) { case NFPROTO_NETDEV: hooks = 1 << NF_NETDEV_INGRESS; break; @@ -370,7 +370,7 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, if (priv->key != NFT_META_PKTTYPE) return 0; - switch (ctx->afi->family) { + switch (ctx->family) { case NFPROTO_BRIDGE: hooks = 1 << NF_BR_PRE_ROUTING; break; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index ed548d06b6dd..1f36954c2ba9 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -142,7 +142,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); - if (family != ctx->afi->family) + if (family != ctx->family) return -EOPNOTSUPP; switch (family) { diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 1e66538bf0ff..c64cbe78dee7 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -75,7 +75,7 @@ int nft_redir_init(const struct nft_ctx *ctx, return -EINVAL; } - return nf_ct_netns_get(ctx->net, ctx->afi->family); + return nf_ct_netns_get(ctx->net, ctx->family); } EXPORT_SYMBOL_GPL(nft_redir_init); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 10c19a3f4cbd..0b56bf05c169 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1082,10 +1082,10 @@ struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af, { struct xt_table *t = xt_find_table_lock(net, af, name); -#ifdef CONFIG_MODULE +#ifdef CONFIG_MODULES if (IS_ERR(t)) { int err = request_module("%stable_%s", xt_prefix[af], name); - if (err) + if (err < 0) return ERR_PTR(err); t = xt_find_table_lock(net, af, name); } @@ -1362,7 +1362,6 @@ static int xt_table_open(struct inode *inode, struct file *file) } static const struct file_operations xt_table_ops = { - .owner = THIS_MODULE, .open = xt_table_open, .read = seq_read, .llseek = seq_lseek, @@ -1498,7 +1497,6 @@ static int xt_match_open(struct inode *inode, struct file *file) } static const struct file_operations xt_match_ops = { - .owner = THIS_MODULE, .open = xt_match_open, .read = seq_read, .llseek = seq_lseek, @@ -1551,7 +1549,6 @@ static int xt_target_open(struct inode *inode, struct file *file) } static const struct file_operations xt_target_ops = { - .owner = THIS_MODULE, .open = xt_target_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 5da8746f7b88..ca6847403ca2 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -353,7 +353,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, static bool select_all(const struct xt_hashlimit_htable *ht, const struct dsthash_ent *he) { - return 1; + return true; } static bool select_gc(const struct xt_hashlimit_htable *ht, @@ -1266,7 +1266,6 @@ static int dl_proc_open(struct inode *inode, struct file *file) } static const struct file_operations dl_file_ops_v2 = { - .owner = THIS_MODULE, .open = dl_proc_open_v2, .read = seq_read, .llseek = seq_lseek, @@ -1274,7 +1273,6 @@ static const struct file_operations dl_file_ops_v2 = { }; static const struct file_operations dl_file_ops_v1 = { - .owner = THIS_MODULE, .open = dl_proc_open_v1, .read = seq_read, .llseek = seq_lseek, @@ -1282,7 +1280,6 @@ static const struct file_operations dl_file_ops_v1 = { }; static const struct file_operations dl_file_ops = { - .owner = THIS_MODULE, .open = dl_proc_open, .read = seq_read, .llseek = seq_lseek, diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index 000e70377f85..7ca64a50db04 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c @@ -58,7 +58,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par) */ pr_debug("Dropping evil IPComp tinygram.\n"); par->hotdrop = true; - return 0; + return false; } return spi_match(compinfo->spis[0], compinfo->spis[1], diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 54cbf5b9864c..2ad445c1d27c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2424,6 +2424,7 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, while (skb->len >= nlmsg_total_size(0)) { int msglen; + memset(&extack, 0, sizeof(extack)); nlh = nlmsg_hdr(skb); err = 0; @@ -2438,7 +2439,6 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, if (nlh->nlmsg_type < NLMSG_MIN_TYPE) goto ack; - memset(&extack, 0, sizeof(extack)); err = cb(skb, nlh, &extack); if (err == -EINTR) goto skip; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index f143908b651d..eb55f1b3d047 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2454,7 +2454,7 @@ static int validate_geneve_opts(struct sw_flow_key *key) option = (struct geneve_opt *)((u8 *)option + len); opts_len -= len; - }; + } key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; @@ -2487,7 +2487,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: break; } - }; + } start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); if (start < 0) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 2e554ef6d75f..9920d2f84eff 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock) sizeof(val)); } -u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) +u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) { - return tcp_sk(tc->t_sock->sk)->snd_nxt; + /* seq# of the last byte of data in tcp send buffer */ + return tcp_sk(tc->t_sock->sk)->write_seq; } u32 rds_tcp_snd_una(struct rds_tcp_connection *tc) diff --git a/net/rds/tcp.h b/net/rds/tcp.h index e7858ee8ed8b..c6fa080e9b6d 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -55,7 +55,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); -u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); +u32 rds_tcp_write_seq(struct rds_tcp_connection *tc); u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); extern struct rds_transport rds_tcp_transport; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 73c74763ca72..16f65744d984 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, * m_ack_seq is set to the sequence number of the last byte of * header and data. see rds_tcp_is_acked(). */ - tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc); + tc->t_last_sent_nxt = rds_tcp_write_seq(tc); rm->m_ack_seq = tc->t_last_sent_nxt + sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1; @@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; rdsdebug("rm %p tcp nxt %u ack_seq %llu\n", - rm, rds_tcp_snd_nxt(tc), + rm, rds_tcp_write_seq(tc), (unsigned long long)rm->m_ack_seq); } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index af4b8ec60d9a..b7ba9b06b147 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -49,6 +49,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, int bind) { struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tcf_csum_params *params_old, *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; struct tc_csum *parm; struct tcf_csum *p; @@ -67,7 +68,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (!tcf_idr_check(tn, parm->index, a, bind)) { ret = tcf_idr_create(tn, parm->index, est, a, - &act_csum_ops, bind, false); + &act_csum_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; @@ -80,10 +81,21 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } p = to_tcf_csum(*a); - spin_lock_bh(&p->tcf_lock); - p->tcf_action = parm->action; - p->update_flags = parm->update_flags; - spin_unlock_bh(&p->tcf_lock); + ASSERT_RTNL(); + + params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); + if (unlikely(!params_new)) { + if (ret == ACT_P_CREATED) + tcf_idr_release(*a, bind); + return -ENOMEM; + } + params_old = rtnl_dereference(p->params); + + params_new->action = parm->action; + params_new->update_flags = parm->update_flags; + rcu_assign_pointer(p->params, params_new); + if (params_old) + kfree_rcu(params_old, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -539,19 +551,21 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); - int action; + struct tcf_csum_params *params; u32 update_flags; + int action; + + rcu_read_lock(); + params = rcu_dereference(p->params); - spin_lock(&p->tcf_lock); tcf_lastuse_update(&p->tcf_tm); - bstats_update(&p->tcf_bstats, skb); - action = p->tcf_action; - update_flags = p->update_flags; - spin_unlock(&p->tcf_lock); + bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb); + action = params->action; if (unlikely(action == TC_ACT_SHOT)) - goto drop; + goto drop_stats; + update_flags = params->update_flags; switch (tc_skb_protocol(skb)) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) @@ -563,13 +577,16 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, break; } +unlock: + rcu_read_unlock(); return action; drop: - spin_lock(&p->tcf_lock); - p->tcf_qstats.drops++; - spin_unlock(&p->tcf_lock); - return TC_ACT_SHOT; + action = TC_ACT_SHOT; + +drop_stats: + qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats)); + goto unlock; } static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, @@ -577,15 +594,18 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, { unsigned char *b = skb_tail_pointer(skb); struct tcf_csum *p = to_tcf_csum(a); + struct tcf_csum_params *params; struct tc_csum opt = { - .update_flags = p->update_flags, .index = p->tcf_index, - .action = p->tcf_action, .refcnt = p->tcf_refcnt - ref, .bindcnt = p->tcf_bindcnt - bind, }; struct tcf_t t; + params = rtnl_dereference(p->params); + opt.action = params->action; + opt.update_flags = params->update_flags; + if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -600,6 +620,15 @@ nla_put_failure: return -1; } +static void tcf_csum_cleanup(struct tc_action *a) +{ + struct tcf_csum *p = to_tcf_csum(a); + struct tcf_csum_params *params; + + params = rcu_dereference_protected(p->params, 1); + kfree_rcu(params, rcu); +} + static int tcf_csum_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops) @@ -623,6 +652,7 @@ static struct tc_action_ops act_csum_ops = { .act = tcf_csum, .dump = tcf_csum_dump, .init = tcf_csum_init, + .cleanup = tcf_csum_cleanup, .walk = tcf_csum_walker, .lookup = tcf_csum_search, .size = sizeof(struct tcf_csum), diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e500d11da9cd..bcb4ccb5f894 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -122,7 +122,8 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) } static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, - u32 prio, struct tcf_chain *chain) + u32 prio, struct tcf_chain *chain, + struct netlink_ext_ack *extack) { struct tcf_proto *tp; int err; @@ -148,6 +149,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, module_put(tp->ops->owner); err = -EAGAIN; } else { + NL_SET_ERR_MSG(extack, "TC classifier not found"); err = -ENOENT; } goto errout; @@ -170,9 +172,10 @@ errout: return ERR_PTR(err); } -static void tcf_proto_destroy(struct tcf_proto *tp) +static void tcf_proto_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { - tp->ops->destroy(tp); + tp->ops->destroy(tp, extack); module_put(tp->ops->owner); kfree_rcu(tp, rcu); } @@ -221,7 +224,7 @@ static void tcf_chain_flush(struct tcf_chain *chain) tcf_chain_head_change(chain, NULL); while (tp) { RCU_INIT_POINTER(chain->filter_chain, tp->next); - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, NULL); tp = rtnl_dereference(chain->filter_chain); tcf_chain_put(chain); } @@ -935,7 +938,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, - u32 parent, void *fh, bool unicast, bool *last) + u32 parent, void *fh, bool unicast, bool *last, + struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -947,11 +951,12 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) { + NL_SET_ERR_MSG(extack, "Failed to build del event notification"); kfree_skb(skb); return -EINVAL; } - err = tp->ops->delete(tp, fh, last); + err = tp->ops->delete(tp, fh, last, extack); if (err) { kfree_skb(skb); return err; @@ -960,8 +965,11 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, if (unicast) return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + if (err < 0) + NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); + return err; } static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, @@ -1021,8 +1029,10 @@ replay: if (prio == 0) { switch (n->nlmsg_type) { case RTM_DELTFILTER: - if (protocol || t->tcm_handle || tca[TCA_KIND]) + if (protocol || t->tcm_handle || tca[TCA_KIND]) { + NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set"); return -ENOENT; + } break; case RTM_NEWTFILTER: /* If no priority is provided by the user, @@ -1035,6 +1045,7 @@ replay: } /* fall-through */ default: + NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); return -ENOENT; } } @@ -1063,23 +1074,31 @@ replay: parent = q->handle; } else { q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); - if (!q) + if (!q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); return -EINVAL; + } } /* Is it classful? */ cops = q->ops->cl_ops; - if (!cops) + if (!cops) { + NL_SET_ERR_MSG(extack, "Qdisc not classful"); return -EINVAL; + } - if (!cops->tcf_block) + if (!cops->tcf_block) { + NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); return -EOPNOTSUPP; + } /* Do we search for filter, attached to class? */ if (TC_H_MIN(parent)) { cl = cops->find(q, parent); - if (cl == 0) + if (cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); return -ENOENT; + } } /* And the last stroke */ @@ -1097,12 +1116,14 @@ replay: chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; if (chain_index > TC_ACT_EXT_VAL_MASK) { + NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; goto errout; } chain = tcf_chain_get(block, chain_index, n->nlmsg_type == RTM_NEWTFILTER); if (!chain) { + NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL; goto errout; } @@ -1118,6 +1139,7 @@ replay: tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, prio_allocate); if (IS_ERR(tp)) { + NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = PTR_ERR(tp); goto errout; } @@ -1126,12 +1148,14 @@ replay: /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND] == NULL || !protocol) { + NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified"); err = -EINVAL; goto errout; } if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags & NLM_F_CREATE)) { + NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; goto errout; } @@ -1140,13 +1164,14 @@ replay: prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info)); tp = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, prio, chain); + protocol, prio, chain, extack); if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout; } tp_created = 1; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { + NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout; } @@ -1158,13 +1183,14 @@ replay: tcf_chain_tp_remove(chain, &chain_info, tp); tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_DELTFILTER, false); - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, extack); err = 0; goto errout; } if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags & NLM_F_CREATE)) { + NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; goto errout; } @@ -1175,33 +1201,39 @@ replay: case RTM_NEWTFILTER: if (n->nlmsg_flags & NLM_F_EXCL) { if (tp_created) - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, NULL); + NL_SET_ERR_MSG(extack, "Filter already exists"); err = -EEXIST; goto errout; } break; case RTM_DELTFILTER: err = tfilter_del_notify(net, skb, n, tp, block, - q, parent, fh, false, &last); + q, parent, fh, false, &last, + extack); if (err) goto errout; if (last) { tcf_chain_tp_remove(chain, &chain_info, tp); - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, extack); } goto errout; case RTM_GETTFILTER: err = tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, true); + if (err < 0) + NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); goto errout; default: + NL_SET_ERR_MSG(extack, "Invalid netlink message type"); err = -EINVAL; goto errout; } } err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, - n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE); + n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, + extack); if (err == 0) { if (tp_created) tcf_chain_tp_insert(chain, &chain_info, tp); @@ -1209,7 +1241,7 @@ replay: RTM_NEWTFILTER, false); } else { if (tp_created) - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, NULL); } errout: @@ -1317,6 +1349,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) block = tcf_block_lookup(net, tcm->tcm_block_index); if (!block) goto out; + /* If we work with block index, q is NULL and parent value + * will never be used in the following code. The check + * in tcf_fill_node prevents it. However, compiler does not + * see that far, so set parent to zero to silence the warning + * about parent being uninitialized. + */ + parent = 0; } else { const struct Qdisc_class_ops *cops; struct net_device *dev; @@ -1385,7 +1424,8 @@ void tcf_exts_destroy(struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) + struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr, + struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -1418,8 +1458,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } #else if ((exts->action && tb[exts->action]) || - (exts->police && tb[exts->police])) + (exts->police && tb[exts->police])) { + NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)"); return -EOPNOTSUPP; + } #endif return 0; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 5f169ded347e..d333f5c5101d 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -112,7 +112,7 @@ static void basic_delete_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void basic_destroy(struct tcf_proto *tp) +static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; @@ -130,7 +130,8 @@ static void basic_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) +static int basic_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f = arg; @@ -152,11 +153,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct basic_filter *f, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) + struct nlattr *est, bool ovr, + struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); if (err < 0) return err; @@ -175,7 +177,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr, + struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); @@ -221,7 +224,8 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, fnew->handle = idr_index; } - err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr); + err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr, + extack); if (err < 0) { if (!fold) idr_remove_ext(&head->handle_idr, fnew->handle); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index cf72aefcf98d..8e5326bc6440 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -147,7 +147,8 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) } static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, - struct cls_bpf_prog *oldprog) + struct cls_bpf_prog *oldprog, + struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_bpf_offload cls_bpf = {}; @@ -158,14 +159,14 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, skip_sw = prog && tc_skip_sw(prog->gen_flags); obj = prog ?: oldprog; - tc_cls_common_offload_init(&cls_bpf.common, tp); + tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags, + extack); cls_bpf.command = TC_CLSBPF_OFFLOAD; cls_bpf.exts = &obj->exts; cls_bpf.prog = prog ? prog->filter : NULL; cls_bpf.oldprog = oldprog ? oldprog->filter : NULL; cls_bpf.name = obj->bpf_name; cls_bpf.exts_integrated = obj->exts_integrated; - cls_bpf.gen_flags = obj->gen_flags; if (oldprog) tcf_block_offload_dec(block, &oldprog->gen_flags); @@ -173,7 +174,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); if (prog) { if (err < 0) { - cls_bpf_offload_cmd(tp, oldprog, prog); + cls_bpf_offload_cmd(tp, oldprog, prog, extack); return err; } else if (err > 0) { tcf_block_offload_inc(block, &prog->gen_flags); @@ -186,10 +187,18 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, return 0; } +static u32 cls_bpf_flags(u32 flags) +{ + return flags & CLS_BPF_SUPPORTED_GEN_FLAGS; +} + static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, - struct cls_bpf_prog *oldprog) + struct cls_bpf_prog *oldprog, + struct netlink_ext_ack *extack) { - if (prog && oldprog && prog->gen_flags != oldprog->gen_flags) + if (prog && oldprog && + cls_bpf_flags(prog->gen_flags) != + cls_bpf_flags(oldprog->gen_flags)) return -EINVAL; if (prog && tc_skip_hw(prog->gen_flags)) @@ -199,15 +208,16 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, if (!prog && !oldprog) return 0; - return cls_bpf_offload_cmd(tp, prog, oldprog); + return cls_bpf_offload_cmd(tp, prog, oldprog, extack); } static void cls_bpf_stop_offload(struct tcf_proto *tp, - struct cls_bpf_prog *prog) + struct cls_bpf_prog *prog, + struct netlink_ext_ack *extack) { int err; - err = cls_bpf_offload_cmd(tp, NULL, prog); + err = cls_bpf_offload_cmd(tp, NULL, prog, extack); if (err) pr_err("Stopping hardware offload failed: %d\n", err); } @@ -218,13 +228,12 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp, struct tcf_block *block = tp->chain->block; struct tc_cls_bpf_offload cls_bpf = {}; - tc_cls_common_offload_init(&cls_bpf.common, tp); + tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, NULL); cls_bpf.command = TC_CLSBPF_STATS; cls_bpf.exts = &prog->exts; cls_bpf.prog = prog->filter; cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - cls_bpf.gen_flags = prog->gen_flags; tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false); } @@ -281,12 +290,13 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) tcf_queue_work(&prog->work); } -static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) +static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); idr_remove_ext(&head->handle_idr, prog->handle); - cls_bpf_stop_offload(tp, prog); + cls_bpf_stop_offload(tp, prog, extack); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); if (tcf_exts_get_net(&prog->exts)) @@ -295,22 +305,24 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) __cls_bpf_delete_prog(prog); } -static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) +static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); - __cls_bpf_delete(tp, arg); + __cls_bpf_delete(tp, arg, extack); *last = list_empty(&head->plist); return 0; } -static void cls_bpf_destroy(struct tcf_proto *tp) +static void cls_bpf_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog, *tmp; list_for_each_entry_safe(prog, tmp, &head->plist, link) - __cls_bpf_delete(tp, prog); + __cls_bpf_delete(tp, prog, extack); idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); @@ -403,7 +415,8 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, struct cls_bpf_prog *prog, unsigned long base, - struct nlattr **tb, struct nlattr *est, bool ovr) + struct nlattr **tb, struct nlattr *est, bool ovr, + struct netlink_ext_ack *extack) { bool is_bpf, is_ebpf, have_exts = false; u32 gen_flags = 0; @@ -414,7 +427,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr); + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack); if (ret < 0) return ret; @@ -452,7 +465,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *oldprog = *arg; @@ -500,11 +513,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, prog->handle = handle; } - ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr); + ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr, + extack); if (ret < 0) goto errout_idr; - ret = cls_bpf_offload(tp, prog, oldprog); + ret = cls_bpf_offload(tp, prog, oldprog, extack); if (ret) goto errout_parms; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 309d5899265f..762da5c0cf5e 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -91,7 +91,8 @@ static void cls_cgroup_destroy_rcu(struct rcu_head *root) static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -121,7 +122,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr, + extack); if (err < 0) goto errout; @@ -141,7 +143,8 @@ errout: return err; } -static void cls_cgroup_destroy(struct tcf_proto *tp) +static void cls_cgroup_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -154,7 +157,8 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) } } -static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) +static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 28cd6fb52c16..cd5fe383afdd 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -401,7 +401,7 @@ static void flow_destroy_filter(struct rcu_head *head) static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; @@ -454,7 +454,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto err2; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr, + extack); if (err < 0) goto err2; @@ -574,7 +575,8 @@ err1: return err; } -static int flow_delete(struct tcf_proto *tp, void *arg, bool *last) +static int flow_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f = arg; @@ -598,7 +600,7 @@ static int flow_init(struct tcf_proto *tp) return 0; } -static void flow_destroy(struct tcf_proto *tp) +static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index f61df19b1026..dc9acaafc0a8 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -218,12 +218,13 @@ static void fl_destroy_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) +static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, + struct netlink_ext_ack *extack) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; - tc_cls_common_offload_init(&cls_flower.common, tp); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; @@ -235,14 +236,15 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) static int fl_hw_replace_filter(struct tcf_proto *tp, struct flow_dissector *dissector, struct fl_flow_key *mask, - struct cls_fl_filter *f) + struct cls_fl_filter *f, + struct netlink_ext_ack *extack) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(f->flags); int err; - tc_cls_common_offload_init(&cls_flower.common, tp); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; cls_flower.cookie = (unsigned long) f; cls_flower.dissector = dissector; @@ -254,7 +256,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); if (err < 0) { - fl_hw_destroy_filter(tp, f); + fl_hw_destroy_filter(tp, f, NULL); return err; } else if (err > 0) { tcf_block_offload_inc(block, &f->flags); @@ -271,7 +273,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; - tc_cls_common_offload_init(&cls_flower.common, tp); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.exts = &f->exts; @@ -281,14 +283,15 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) &cls_flower, false); } -static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) +static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); idr_remove_ext(&head->handle_idr, f->handle); list_del_rcu(&f->list); if (!tc_skip_hw(f->flags)) - fl_hw_destroy_filter(tp, f); + fl_hw_destroy_filter(tp, f, extack); tcf_unbind_filter(tp, &f->res); if (tcf_exts_get_net(&f->exts)) call_rcu(&f->rcu, fl_destroy_filter); @@ -314,13 +317,13 @@ static void fl_destroy_rcu(struct rcu_head *rcu) schedule_work(&head->work); } -static void fl_destroy(struct tcf_proto *tp) +static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f, *next; list_for_each_entry_safe(f, next, &head->filters, list) - __fl_delete(tp, f); + __fl_delete(tp, f, extack); idr_destroy(&head->handle_idr); __module_get(THIS_MODULE); @@ -526,13 +529,14 @@ static void fl_set_key_ip(struct nlattr **tb, } static int fl_set_key(struct net *net, struct nlattr **tb, - struct fl_flow_key *key, struct fl_flow_key *mask) + struct fl_flow_key *key, struct fl_flow_key *mask, + struct netlink_ext_ack *extack) { __be16 ethertype; int ret = 0; #ifdef CONFIG_NET_CLS_IND if (tb[TCA_FLOWER_INDEV]) { - int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]); + int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack); if (err < 0) return err; key->indev_ifindex = err; @@ -827,11 +831,12 @@ static int fl_check_assign_mask(struct cls_fl_head *head, static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) + struct nlattr *est, bool ovr, + struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); if (err < 0) return err; @@ -840,7 +845,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, tcf_bind_filter(tp, &f->res, base); } - err = fl_set_key(net, tb, &f->key, &mask->key); + err = fl_set_key(net, tb, &f->key, &mask->key, extack); if (err) return err; @@ -853,7 +858,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = *arg; @@ -916,7 +921,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } } - err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); + err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr, + extack); if (err) goto errout_idr; @@ -940,7 +946,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, err = fl_hw_replace_filter(tp, &head->dissector, &mask.key, - fnew); + fnew, + extack); if (err) goto errout_idr; } @@ -953,7 +960,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, rhashtable_remove_fast(&head->ht, &fold->ht_node, head->ht_params); if (!tc_skip_hw(fold->flags)) - fl_hw_destroy_filter(tp, fold); + fl_hw_destroy_filter(tp, fold, NULL); } *arg = fnew; @@ -983,7 +990,8 @@ errout_tb: return err; } -static int fl_delete(struct tcf_proto *tp, void *arg, bool *last) +static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = arg; @@ -991,7 +999,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last) if (!tc_skip_sw(f->flags)) rhashtable_remove_fast(&head->ht, &f->ht_node, head->ht_params); - __fl_delete(tp, f); + __fl_delete(tp, f, extack); *last = list_empty(&head->filters); return 0; } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 20f0de1a960a..8b207723fbc2 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -149,7 +149,7 @@ static void fw_delete_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void fw_destroy(struct tcf_proto *tp) +static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; @@ -172,7 +172,8 @@ static void fw_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int fw_delete(struct tcf_proto *tp, void *arg, bool *last) +static int fw_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = arg; @@ -218,13 +219,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_set_parms(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, - struct nlattr **tca, unsigned long base, bool ovr) + struct nlattr **tca, unsigned long base, bool ovr, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); u32 mask; int err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr, + extack); if (err < 0) return err; @@ -236,7 +239,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, #ifdef CONFIG_NET_CLS_IND if (tb[TCA_FW_INDEV]) { int ret; - ret = tcf_change_indev(net, tb[TCA_FW_INDEV]); + ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack); if (ret < 0) return ret; f->ifindex = ret; @@ -257,7 +260,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, - bool ovr) + bool ovr, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = *arg; @@ -296,7 +299,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return err; } - err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr); + err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack); if (err < 0) { tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -345,7 +348,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, f->id = handle; f->tp = tp; - err = fw_set_parms(net, tp, f, tb, tca, base, ovr); + err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index d0e57c86636f..2ba721a590a7 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -71,12 +71,13 @@ static void mall_destroy_rcu(struct rcu_head *rcu) static void mall_destroy_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, - unsigned long cookie) + unsigned long cookie, + struct netlink_ext_ack *extack) { struct tc_cls_matchall_offload cls_mall = {}; struct tcf_block *block = tp->chain->block; - tc_cls_common_offload_init(&cls_mall.common, tp); + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; @@ -86,14 +87,15 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, static int mall_replace_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, - unsigned long cookie) + unsigned long cookie, + struct netlink_ext_ack *extack) { struct tc_cls_matchall_offload cls_mall = {}; struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(head->flags); int err; - tc_cls_common_offload_init(&cls_mall.common, tp); + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.exts = &head->exts; cls_mall.cookie = cookie; @@ -101,7 +103,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); if (err < 0) { - mall_destroy_hw_filter(tp, head, cookie); + mall_destroy_hw_filter(tp, head, cookie, NULL); return err; } else if (err > 0) { tcf_block_offload_inc(block, &head->flags); @@ -113,7 +115,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return 0; } -static void mall_destroy(struct tcf_proto *tp) +static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); @@ -121,7 +123,7 @@ static void mall_destroy(struct tcf_proto *tp) return; if (!tc_skip_hw(head->flags)) - mall_destroy_hw_filter(tp, head, (unsigned long) head); + mall_destroy_hw_filter(tp, head, (unsigned long) head, extack); if (tcf_exts_get_net(&head->exts)) call_rcu(&head->rcu, mall_destroy_rcu); @@ -142,11 +144,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { static int mall_set_parms(struct net *net, struct tcf_proto *tp, struct cls_mall_head *head, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) + struct nlattr *est, bool ovr, + struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack); if (err < 0) return err; @@ -160,7 +163,7 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_MATCHALL_MAX + 1]; @@ -198,12 +201,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, new->handle = handle; new->flags = flags; - err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr); + err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr, + extack); if (err) goto err_set_parms; if (!tc_skip_hw(new->flags)) { - err = mall_replace_hw_filter(tp, new, (unsigned long) new); + err = mall_replace_hw_filter(tp, new, (unsigned long)new, + extack); if (err) goto err_replace_hw_filter; } @@ -223,7 +228,8 @@ err_exts_init: return err; } -static int mall_delete(struct tcf_proto *tp, void *arg, bool *last) +static int mall_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index a1f2b1b7c014..21a03a8ee029 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -281,7 +281,7 @@ static void route4_delete_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void route4_destroy(struct tcf_proto *tp) +static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; @@ -316,7 +316,8 @@ static void route4_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int route4_delete(struct tcf_proto *tp, void *arg, bool *last) +static int route4_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter *f = arg; @@ -389,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct route4_filter *f, u32 handle, struct route4_head *head, struct nlattr **tb, struct nlattr *est, int new, - bool ovr) + bool ovr, struct netlink_ext_ack *extack) { u32 id = 0, to = 0, nhandle = 0x8000; struct route4_filter *fp; @@ -397,7 +398,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); if (err < 0) return err; @@ -471,7 +472,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -515,7 +517,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, } err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], new, ovr); + tca[TCA_RATE], new, ovr, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index cf325625c99d..4f1297657c27 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -322,7 +322,7 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) __rsvp_delete_filter(f); } -static void rsvp_destroy(struct tcf_proto *tp) +static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; @@ -350,7 +350,8 @@ static void rsvp_destroy(struct tcf_proto *tp) kfree_rcu(data, rcu); } -static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last) +static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_filter *nfp, *f = arg; @@ -486,7 +487,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr) + void **arg, bool ovr, struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); struct rsvp_filter *f, *nfp; @@ -511,7 +512,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack); if (err < 0) goto errout2; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 67467ae24c97..b49cc990a000 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -193,7 +193,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head) tcf_queue_work(&f->work); } -static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last) +static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = arg; @@ -246,7 +247,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp, { bool last; - return tcindex_delete(tp, arg, &last); + return tcindex_delete(tp, arg, &last, NULL); } static void __tcindex_destroy(struct rcu_head *head) @@ -322,7 +323,7 @@ static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, - struct nlattr *est, bool ovr) + struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) { struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_filter_result cr; @@ -334,7 +335,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack); if (err < 0) goto errout; @@ -520,7 +521,8 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr, + struct netlink_ext_ack *extack) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -540,7 +542,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, return err; return tcindex_set_parms(net, tp, base, handle, p, r, tb, - tca[TCA_RATE], ovr); + tca[TCA_RATE], ovr, extack); } static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) @@ -579,7 +581,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } -static void tcindex_destroy(struct tcf_proto *tp) +static void tcindex_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcf_walker walker; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 020d328d0afd..60c892c36a60 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -87,6 +87,7 @@ struct tc_u_hnode { unsigned int divisor; struct idr handle_idr; struct rcu_head rcu; + u32 flags; /* The 'ht' field MUST be the last field in structure to allow for * more entries allocated at end of structure. */ @@ -486,12 +487,13 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) return 0; } -static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) +static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, + struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - tc_cls_common_offload_init(&cls_u32.common, tp); + tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack); cls_u32.command = TC_CLSU32_DELETE_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; @@ -501,7 +503,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, - u32 flags) + u32 flags, struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; @@ -509,7 +511,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, bool offloaded = false; int err; - tc_cls_common_offload_init(&cls_u32.common, tp); + tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); cls_u32.command = TC_CLSU32_NEW_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; @@ -517,7 +519,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); if (err < 0) { - u32_clear_hw_hnode(tp, h); + u32_clear_hw_hnode(tp, h, NULL); return err; } else if (err > 0) { offloaded = true; @@ -529,12 +531,13 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, return 0; } -static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n) +static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, + struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - tc_cls_common_offload_init(&cls_u32.common, tp); + tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; @@ -543,14 +546,14 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n) } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, - u32 flags) + u32 flags, struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; bool skip_sw = tc_skip_sw(flags); int err; - tc_cls_common_offload_init(&cls_u32.common, tp); + tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); cls_u32.command = TC_CLSU32_REPLACE_KNODE; cls_u32.knode.handle = n->handle; cls_u32.knode.fshift = n->fshift; @@ -568,7 +571,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); if (err < 0) { - u32_remove_hw_knode(tp, n); + u32_remove_hw_knode(tp, n, NULL); return err; } else if (err > 0) { tcf_block_offload_inc(block, &n->flags); @@ -580,7 +583,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, return 0; } -static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) +static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, + struct netlink_ext_ack *extack) { struct tc_u_knode *n; unsigned int h; @@ -590,7 +594,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) RCU_INIT_POINTER(ht->ht[h], rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); - u32_remove_hw_knode(tp, n); + u32_remove_hw_knode(tp, n, extack); idr_remove_ext(&ht->handle_idr, n->handle); if (tcf_exts_get_net(&n->exts)) call_rcu(&n->rcu, u32_delete_key_freepf_rcu); @@ -600,7 +604,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) } } -static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) +static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, + struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode __rcu **hn; @@ -608,14 +613,14 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) WARN_ON(ht->refcnt); - u32_clear_hnode(tp, ht); + u32_clear_hnode(tp, ht, extack); hn = &tp_c->hlist; for (phn = rtnl_dereference(*hn); phn; hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { - u32_clear_hw_hnode(tp, ht); + u32_clear_hw_hnode(tp, ht, extack); idr_destroy(&ht->handle_idr); idr_remove_ext(&tp_c->handle_idr, ht->handle); RCU_INIT_POINTER(*hn, ht->next); @@ -638,7 +643,7 @@ static bool ht_empty(struct tc_u_hnode *ht) return true; } -static void u32_destroy(struct tcf_proto *tp) +static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); @@ -646,7 +651,7 @@ static void u32_destroy(struct tcf_proto *tp) WARN_ON(root_ht == NULL); if (root_ht && --root_ht->refcnt == 0) - u32_destroy_hnode(tp, root_ht); + u32_destroy_hnode(tp, root_ht, extack); if (--tp_c->refcnt == 0) { struct tc_u_hnode *ht; @@ -657,7 +662,7 @@ static void u32_destroy(struct tcf_proto *tp) ht; ht = rtnl_dereference(ht->next)) { ht->refcnt--; - u32_clear_hnode(tp, ht); + u32_clear_hnode(tp, ht, extack); } while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { @@ -672,7 +677,8 @@ static void u32_destroy(struct tcf_proto *tp) tp->data = NULL; } -static int u32_delete(struct tcf_proto *tp, void *arg, bool *last) +static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, + struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = arg; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); @@ -683,18 +689,21 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last) goto out; if (TC_U32_KEY(ht->handle)) { - u32_remove_hw_knode(tp, (struct tc_u_knode *)ht); + u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack); ret = u32_delete_key(tp, (struct tc_u_knode *)ht); goto out; } - if (root_ht == ht) + if (root_ht == ht) { + NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node"); return -EINVAL; + } if (ht->refcnt == 1) { ht->refcnt--; - u32_destroy_hnode(tp, ht); + u32_destroy_hnode(tp, ht, extack); } else { + NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); return -EBUSY; } @@ -765,11 +774,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { static int u32_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tc_u_hnode *ht, struct tc_u_knode *n, struct nlattr **tb, - struct nlattr *est, bool ovr) + struct nlattr *est, bool ovr, + struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack); if (err < 0) return err; @@ -777,14 +787,18 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; - if (TC_U32_KEY(handle)) + if (TC_U32_KEY(handle)) { + NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table"); return -EINVAL; + } if (handle) { ht_down = u32_lookup_ht(ht->tp_c, handle); - if (ht_down == NULL) + if (!ht_down) { + NL_SET_ERR_MSG_MOD(extack, "Link hash table not found"); return -EINVAL; + } ht_down->refcnt++; } @@ -802,7 +816,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, #ifdef CONFIG_NET_CLS_IND if (tb[TCA_U32_INDEV]) { int ret; - ret = tcf_change_indev(net, tb[TCA_U32_INDEV]); + ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); if (ret < 0) return -EINVAL; n->ifindex = ret; @@ -893,7 +907,8 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr, + struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -907,28 +922,40 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, size_t size; #endif - if (opt == NULL) - return handle ? -EINVAL : 0; + if (!opt) { + if (handle) { + NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options"); + return -EINVAL; + } else { + return 0; + } + } - err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL); + err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack); if (err < 0) return err; if (tb[TCA_U32_FLAGS]) { flags = nla_get_u32(tb[TCA_U32_FLAGS]); - if (!tc_flags_valid(flags)) + if (!tc_flags_valid(flags)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags"); return -EINVAL; + } } n = *arg; if (n) { struct tc_u_knode *new; - if (TC_U32_KEY(n->handle) == 0) + if (TC_U32_KEY(n->handle) == 0) { + NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero"); return -EINVAL; + } - if (n->flags != flags) + if (n->flags != flags) { + NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags"); return -EINVAL; + } new = u32_init_knode(tp, n); if (!new) @@ -936,14 +963,14 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, err = u32_set_parms(net, tp, base, rtnl_dereference(n->ht_up), new, tb, - tca[TCA_RATE], ovr); + tca[TCA_RATE], ovr, extack); if (err) { u32_destroy_key(tp, new, false); return err; } - err = u32_replace_hw_knode(tp, new, flags); + err = u32_replace_hw_knode(tp, new, flags, extack); if (err) { u32_destroy_key(tp, new, false); return err; @@ -962,10 +989,14 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_U32_DIVISOR]) { unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); - if (--divisor > 0x100) + if (--divisor > 0x100) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets"); return -EINVAL; - if (TC_U32_KEY(handle)) + } + if (TC_U32_KEY(handle)) { + NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table"); return -EINVAL; + } ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); if (ht == NULL) return -ENOBUFS; @@ -989,8 +1020,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->handle = handle; ht->prio = tp->prio; idr_init(&ht->handle_idr); + ht->flags = flags; - err = u32_replace_hw_hnode(tp, ht, flags); + err = u32_replace_hw_hnode(tp, ht, flags, extack); if (err) { idr_remove_ext(&tp_c->handle_idr, handle); kfree(ht); @@ -1011,20 +1043,26 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, htid = ht->handle; } else { ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid)); - if (ht == NULL) + if (!ht) { + NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found"); return -EINVAL; + } } } else { ht = rtnl_dereference(tp->root); htid = ht->handle; } - if (ht->divisor < TC_U32_HASH(htid)) + if (ht->divisor < TC_U32_HASH(htid)) { + NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value"); return -EINVAL; + } if (handle) { - if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) + if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { + NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); return -EINVAL; + } handle = htid | TC_U32_NODE(handle); err = idr_alloc_ext(&ht->handle_idr, NULL, NULL, handle, handle + 1, @@ -1035,6 +1073,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, handle = gen_new_kid(ht, htid); if (tb[TCA_U32_SEL] == NULL) { + NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); err = -EINVAL; goto erridr; } @@ -1083,12 +1122,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr); + err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr, + extack); if (err == 0) { struct tc_u_knode __rcu **ins; struct tc_u_knode *pins; - err = u32_replace_hw_knode(tp, n, flags); + err = u32_replace_hw_knode(tp, n, flags, extack); if (err) goto errhw; diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index df3110d69585..07c10bac06a0 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em, if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len)) return 0; - return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len); + return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len); } static struct tcf_ematch_ops em_nbyte_ops = { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ef8b4ecde2ac..1816bde47256 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -510,7 +510,7 @@ void netif_carrier_on(struct net_device *dev) if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return; - atomic_inc(&dev->carrier_changes); + atomic_inc(&dev->carrier_up_count); linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -529,7 +529,7 @@ void netif_carrier_off(struct net_device *dev) if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return; - atomic_inc(&dev->carrier_changes); + atomic_inc(&dev->carrier_down_count); linkwatch_fire_event(dev); } } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index a398502899a9..efbf51f35778 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -254,11 +254,15 @@ static int prio_dump_offload(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct tc_prio_qopt_offload hw_stats = { + .command = TC_PRIO_STATS, .handle = sch->handle, .parent = sch->parent, - .command = TC_PRIO_STATS, - .stats.bstats = &sch->bstats, - .stats.qstats = &sch->qstats, + { + .stats = { + .bstats = &sch->bstats, + .qstats = &sch->qstats, + }, + }, }; int err; diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 275925b93b29..35bc7106d182 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct sctphdr *sh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) + goto out; + sh = sctp_hdr(skb); if (!pskb_may_pull(skb, sizeof(*sh))) goto out; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7ff444ecee75..a40fa53c93ef 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4860,9 +4860,10 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), struct net *net, int *pos, void *p) { struct rhashtable_iter hti; struct sctp_transport *tsp; - int ret = 0; + int ret; again: + ret = 0; sctp_transport_walk_start(&hti); tsp = sctp_transport_get_idx(net, &hti, *pos + 1); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index daf8075f5a4c..cf0e11978b66 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -377,6 +377,15 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } +static void smc_lgr_forget(struct smc_link_group *lgr) +{ + spin_lock_bh(&smc_lgr_list.lock); + /* do not use this link group for new connections */ + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); + spin_unlock_bh(&smc_lgr_list.lock); +} + /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc) { @@ -513,6 +522,8 @@ out_connected: return rc ? rc : local_contact; decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); decline_rdma: @@ -526,6 +537,8 @@ decline_rdma: goto out_connected; out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); out_err: @@ -581,39 +594,32 @@ out_err: static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) { - struct sock *sk = &lsmc->sk; - struct socket *new_clcsock; + struct socket *new_clcsock = NULL; + struct sock *lsk = &lsmc->sk; struct sock *new_sk; int rc; - release_sock(&lsmc->sk); - new_sk = smc_sock_alloc(sock_net(sk), NULL); + release_sock(lsk); + new_sk = smc_sock_alloc(sock_net(lsk), NULL); if (!new_sk) { rc = -ENOMEM; - lsmc->sk.sk_err = ENOMEM; + lsk->sk_err = ENOMEM; *new_smc = NULL; - lock_sock(&lsmc->sk); + lock_sock(lsk); goto out; } *new_smc = smc_sk(new_sk); rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); - lock_sock(&lsmc->sk); - if (rc < 0) { - lsmc->sk.sk_err = -rc; - new_sk->sk_state = SMC_CLOSED; - sock_set_flag(new_sk, SOCK_DEAD); - sk->sk_prot->unhash(new_sk); - sock_put(new_sk); - *new_smc = NULL; - goto out; - } - if (lsmc->sk.sk_state == SMC_CLOSED) { + lock_sock(lsk); + if (rc < 0) + lsk->sk_err = -rc; + if (rc < 0 || lsk->sk_state == SMC_CLOSED) { if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); - sk->sk_prot->unhash(new_sk); + new_sk->sk_prot->unhash(new_sk); sock_put(new_sk); *new_smc = NULL; goto out; @@ -913,6 +919,8 @@ enqueue: return; decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); decline_rdma: /* RDMA setup failed, switch back to TCP */ @@ -925,6 +933,8 @@ decline_rdma: goto out_connected; out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); out_err: newsmcsk->sk_state = SMC_CLOSED; @@ -936,11 +946,12 @@ static void smc_tcp_listen_work(struct work_struct *work) { struct smc_sock *lsmc = container_of(work, struct smc_sock, tcp_listen_work); + struct sock *lsk = &lsmc->sk; struct smc_sock *new_smc; int rc = 0; - lock_sock(&lsmc->sk); - while (lsmc->sk.sk_state == SMC_LISTEN) { + lock_sock(lsk); + while (lsk->sk_state == SMC_LISTEN) { rc = smc_clcsock_accept(lsmc, &new_smc); if (rc) goto out; @@ -949,15 +960,15 @@ static void smc_tcp_listen_work(struct work_struct *work) new_smc->listen_smc = lsmc; new_smc->use_fallback = false; /* assume rdma capability first*/ - sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ + sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); schedule_work(&new_smc->smc_listen_work); } out: - release_sock(&lsmc->sk); - lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ + release_sock(lsk); + lsk->sk_data_ready(lsk); /* no more listening, wake accept */ } static int smc_listen(struct socket *sock, int backlog) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index d4155ff6acde..6e8f5fbe0f09 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -57,9 +57,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, cdcpend->conn); } smc_tx_sndbuf_nonfull(smc); - if (smc->sk.sk_state != SMC_ACTIVE) - /* wake up smc_close_wait_tx_pends() */ - smc->sk.sk_state_change(&smc->sk); bh_unlock_sock(&smc->sk); } @@ -68,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_cdc_tx_pend **pend) { struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + int rc; - return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, - (struct smc_wr_tx_pend_priv **)pend); + rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, + (struct smc_wr_tx_pend_priv **)pend); + if (!conn->alert_token_local) + /* abnormal termination */ + rc = -EPIPE; + return rc; } static inline void smc_cdc_add_pending_send(struct smc_connection *conn, @@ -155,14 +157,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) (unsigned long)conn); } -bool smc_cdc_tx_has_pending(struct smc_connection *conn) -{ - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; - - return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE, - smc_cdc_tx_filter, (unsigned long)conn); -} - /********************************* receive ***********************************/ static inline bool smc_cdc_before(u16 seq1, u16 seq2) diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 149ceda1b088..ab240b37ad11 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -214,7 +214,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); -bool smc_cdc_tx_has_pending(struct smc_connection *conn); int smc_cdc_init(void) __init; #endif /* SMC_CDC_H */ diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index e194c6cc308a..babe05d385e7 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -19,8 +19,6 @@ #include "smc_cdc.h" #include "smc_close.h" -#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) - static void smc_close_cleanup_listen(struct sock *parent) { struct sock *sk; @@ -30,26 +28,6 @@ static void smc_close_cleanup_listen(struct sock *parent) smc_close_non_accepted(sk); } -static void smc_close_wait_tx_pends(struct smc_sock *smc) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct sock *sk = &smc->sk; - signed long timeout; - - timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; - add_wait_queue(sk_sleep(sk), &wait); - while (!signal_pending(current) && timeout) { - int rc; - - rc = sk_wait_event(sk, &timeout, - !smc_cdc_tx_has_pending(&smc->conn), - &wait); - if (rc) - break; - } - remove_wait_queue(sk_sleep(sk), &wait); -} - /* wait for sndbuf data being transmitted */ static void smc_close_stream_wait(struct smc_sock *smc, long timeout) { @@ -111,49 +89,53 @@ static int smc_close_abort(struct smc_connection *conn) } /* terminate smc socket abnormally - active abort - * RDMA communication no longer possible + * link group is terminated, i.e. RDMA communication no longer possible */ static void smc_close_active_abort(struct smc_sock *smc) { + struct sock *sk = &smc->sk; + struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; - smc->sk.sk_err = ECONNABORTED; + sk->sk_err = ECONNABORTED; if (smc->clcsock && smc->clcsock->sk) { smc->clcsock->sk->sk_err = ECONNABORTED; smc->clcsock->sk->sk_state_change(smc->clcsock->sk); } - switch (smc->sk.sk_state) { + switch (sk->sk_state) { case SMC_INIT: case SMC_ACTIVE: - smc->sk.sk_state = SMC_PEERABORTWAIT; + sk->sk_state = SMC_PEERABORTWAIT; + release_sock(sk); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - txflags->peer_conn_abort = 1; sock_release(smc->clcsock); if (!smc_cdc_rxed_any_close(&smc->conn)) - smc->sk.sk_state = SMC_PEERABORTWAIT; + sk->sk_state = SMC_PEERABORTWAIT; else - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; + release_sock(sk); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (!txflags->peer_conn_closed) { - smc->sk.sk_state = SMC_PEERABORTWAIT; - txflags->peer_conn_abort = 1; + sk->sk_state = SMC_PEERABORTWAIT; sock_release(smc->clcsock); } else { - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; } break; case SMC_PROCESSABORT: case SMC_APPFINCLOSEWAIT: - if (!txflags->peer_conn_closed) { - txflags->peer_conn_abort = 1; + if (!txflags->peer_conn_closed) sock_release(smc->clcsock); - } - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; break; case SMC_PEERFINCLOSEWAIT: case SMC_PEERABORTWAIT: @@ -161,8 +143,8 @@ static void smc_close_active_abort(struct smc_sock *smc) break; } - sock_set_flag(&smc->sk, SOCK_DEAD); - smc->sk.sk_state_change(&smc->sk); + sock_set_flag(sk, SOCK_DEAD); + sk->sk_state_change(sk); } static inline bool smc_close_sent_any_close(struct smc_connection *conn) @@ -185,9 +167,9 @@ int smc_close_active(struct smc_sock *smc) 0 : sock_flag(sk, SOCK_LINGER) ? sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; -again: old_state = sk->sk_state; - switch (old_state) { +again: + switch (sk->sk_state) { case SMC_INIT: sk->sk_state = SMC_CLOSED; if (smc->smc_listen_work.func) @@ -214,6 +196,8 @@ again: if (sk->sk_state == SMC_ACTIVE) { /* send close request */ rc = smc_close_final(conn); + if (rc) + break; sk->sk_state = SMC_PEERCLOSEWAIT1; } else { /* peer event has changed the state */ @@ -226,9 +210,10 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); + if (rc) + break; } sk->sk_state = SMC_CLOSED; - smc_close_wait_tx_pends(smc); break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: @@ -237,19 +222,19 @@ again: release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); - if (sk->sk_err != ECONNABORTED) { - /* confirm close from peer */ - rc = smc_close_final(conn); - if (rc) - break; - } + if (sk->sk_state != SMC_APPCLOSEWAIT1 && + sk->sk_state != SMC_APPCLOSEWAIT2) + goto again; + /* confirm close from peer */ + rc = smc_close_final(conn); + if (rc) + break; if (smc_cdc_rxed_any_close(conn)) /* peer has closed the socket already */ sk->sk_state = SMC_CLOSED; else /* peer has just issued a shutdown write */ sk->sk_state = SMC_PEERFINCLOSEWAIT; - smc_close_wait_tx_pends(smc); break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: @@ -257,6 +242,8 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); + if (rc) + break; } /* peer sending PeerConnectionClosed will cause transition */ break; @@ -264,12 +251,8 @@ again: /* peer sending PeerConnectionClosed will cause transition */ break; case SMC_PROCESSABORT: - release_sock(sk); - cancel_delayed_work_sync(&conn->tx_work); - lock_sock(sk); smc_close_abort(conn); sk->sk_state = SMC_CLOSED; - smc_close_wait_tx_pends(smc); break; case SMC_PEERABORTWAIT: case SMC_CLOSED: @@ -278,7 +261,7 @@ again: } if (old_state != sk->sk_state) - sk->sk_state_change(&smc->sk); + sk->sk_state_change(sk); return rc; } @@ -293,7 +276,6 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) case SMC_APPFINCLOSEWAIT: case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - smc_close_abort(&smc->conn); sk->sk_state = SMC_PROCESSABORT; break; case SMC_PEERCLOSEWAIT1: @@ -301,7 +283,6 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) if (txflags->peer_done_writing && !smc_close_sent_any_close(&smc->conn)) { /* just shutdown, but not yet closed locally */ - smc_close_abort(&smc->conn); sk->sk_state = SMC_PROCESSABORT; } else { sk->sk_state = SMC_CLOSED; @@ -318,8 +299,9 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) } } -/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, - * or peer_done_writing. +/* Either some kind of closing has been received: peer_conn_closed, + * peer_conn_abort, or peer_done_writing + * or the link group of the connection terminates abnormally. */ static void smc_close_passive_work(struct work_struct *work) { @@ -331,7 +313,7 @@ static void smc_close_passive_work(struct work_struct *work) struct sock *sk = &smc->sk; int old_state; - lock_sock(&smc->sk); + lock_sock(sk); old_state = sk->sk_state; if (!conn->alert_token_local) { @@ -340,15 +322,19 @@ static void smc_close_passive_work(struct work_struct *work) goto wakeup; } - rxflags = &smc->conn.local_rx_ctrl.conn_state_flags; + rxflags = &conn->local_rx_ctrl.conn_state_flags; if (rxflags->peer_conn_abort) { + /* peer has not received all data */ smc_close_passive_abort_received(smc); + release_sock(&smc->sk); + cancel_delayed_work_sync(&conn->tx_work); + lock_sock(&smc->sk); goto wakeup; } switch (sk->sk_state) { case SMC_INIT: - if (atomic_read(&smc->conn.bytes_to_rcv) || + if (atomic_read(&conn->bytes_to_rcv) || (rxflags->peer_done_writing && !smc_cdc_rxed_any_close(conn))) sk->sk_state = SMC_APPCLOSEWAIT1; @@ -364,8 +350,7 @@ static void smc_close_passive_work(struct work_struct *work) /* fall through */ /* to check for closing */ case SMC_PEERCLOSEWAIT2: - case SMC_PEERFINCLOSEWAIT: - if (!smc_cdc_rxed_any_close(&smc->conn)) + if (!smc_cdc_rxed_any_close(conn)) break; if (sock_flag(sk, SOCK_DEAD) && smc_close_sent_any_close(conn)) { @@ -376,6 +361,10 @@ static void smc_close_passive_work(struct work_struct *work) sk->sk_state = SMC_APPFINCLOSEWAIT; } break; + case SMC_PEERFINCLOSEWAIT: + if (smc_cdc_rxed_any_close(conn)) + sk->sk_state = SMC_CLOSED; + break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: case SMC_APPFINCLOSEWAIT: @@ -394,12 +383,12 @@ wakeup: sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { - smc_conn_free(&smc->conn); + smc_conn_free(conn); schedule_delayed_work(&smc->sock_put_work, SMC_CLOSE_SOCK_PUT_DELAY); } } - release_sock(&smc->sk); + release_sock(sk); } void smc_close_sock_put_work(struct work_struct *work) @@ -424,20 +413,21 @@ int smc_close_shutdown_write(struct smc_sock *smc) 0 : sock_flag(sk, SOCK_LINGER) ? sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; -again: old_state = sk->sk_state; - switch (old_state) { +again: + switch (sk->sk_state) { case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); + if (sk->sk_state != SMC_ACTIVE) + goto again; /* send close wr request */ rc = smc_close_wr(conn); - if (sk->sk_state == SMC_ACTIVE) - sk->sk_state = SMC_PEERCLOSEWAIT1; - else - goto again; + if (rc) + break; + sk->sk_state = SMC_PEERCLOSEWAIT1; break; case SMC_APPCLOSEWAIT1: /* passive close */ @@ -446,8 +436,12 @@ again: release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); + if (sk->sk_state != SMC_APPCLOSEWAIT1) + goto again; /* confirm close from peer */ rc = smc_close_wr(conn); + if (rc) + break; sk->sk_state = SMC_APPCLOSEWAIT2; break; case SMC_APPCLOSEWAIT2: @@ -462,7 +456,7 @@ again: } if (old_state != sk->sk_state) - sk->sk_state_change(&smc->sk); + sk->sk_state_change(sk); return rc; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 94f21116dac5..ed5b46d1fe41 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work) bool conns; spin_lock_bh(&smc_lgr_list.lock); + if (list_empty(&lgr->list)) + goto free; read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work) return; } list_del_init(&lgr->list); /* remove from smc_lgr_list */ +free: spin_unlock_bh(&smc_lgr_list.lock); smc_lgr_free(lgr); } @@ -231,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn) /* remove a finished connection from its link group */ void smc_conn_free(struct smc_connection *conn) { - struct smc_link_group *lgr = conn->lgr; - - if (!lgr) + if (!conn->lgr) return; smc_cdc_tx_dismiss_slots(conn); smc_lgr_unregister_conn(conn); @@ -328,12 +329,16 @@ void smc_lgr_terminate(struct smc_link_group *lgr) conn = rb_entry(node, struct smc_connection, alert_node); smc = container_of(conn, struct smc_sock, conn); sock_hold(&smc->sk); + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); + write_unlock_bh(&lgr->conns_lock); schedule_work(&conn->close_work); + write_lock_bh(&lgr->conns_lock); sock_put(&smc->sk); node = rb_first(&lgr->conns_all); } write_unlock_bh(&lgr->conns_lock); + wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); } /* Determine vlan of internal TCP socket. diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d2d01cf70224..427b91c1c964 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) goto errout; - if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) { + if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && + smc->conn.alert_token_local) { struct smc_connection *conn = &smc->conn; struct smc_diag_conninfo cinfo = { .token = conn->alert_token_local, @@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) { + if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr && + !list_empty(&smc->conn.lgr->list)) { struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 2e50fddf8ce9..838bce20c361 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -86,7 +86,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) rc = -EPIPE; break; } - if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { + if (smc_cdc_rxed_any_close(conn)) { rc = -ECONNRESET; break; } @@ -107,7 +107,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) sk_wait_event(sk, &timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || - smc_cdc_rxed_any_close_or_senddone(conn) || + smc_cdc_rxed_any_close(conn) || atomic_read(&conn->sndbuf_space), &wait); } @@ -248,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, peer_rmbe_offset; rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr); - if (rc) + if (rc) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + smc_lgr_terminate(lgr); + } return rc; } @@ -406,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) goto out_unlock; } rc = 0; - schedule_delayed_work(&conn->tx_work, - SMC_TX_WORK_DELAY); + if (conn->alert_token_local) /* connection healthy */ + schedule_delayed_work(&conn->tx_work, + SMC_TX_WORK_DELAY); } goto out_unlock; } @@ -438,10 +441,17 @@ static void smc_tx_work(struct work_struct *work) int rc; lock_sock(&smc->sk); + if (smc->sk.sk_err || + !conn->alert_token_local || + conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) + goto out; + rc = smc_tx_sndbuf_nonempty(conn); if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) conn->local_rx_ctrl.prod_flags.write_blocked = 0; + +out: release_sock(&smc->sk); } @@ -462,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - if (smc_cdc_get_slot_and_msg_send(conn) < 0) { + if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && + conn->alert_token_local) { /* connection healthy */ schedule_delayed_work(&conn->tx_work, SMC_TX_WORK_DELAY); return; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index de4537f66832..1b8af23e6e2b 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -122,6 +122,7 @@ static void smc_wr_tx_tasklet_fn(unsigned long data) again: polled++; do { + memset(&wc, 0, sizeof(wc)); rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); if (polled == 1) { ib_req_notify_cq(dev->roce_cq_send, @@ -173,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, struct smc_wr_tx_pend_priv **wr_pend_priv) { struct smc_wr_tx_pend *wr_pend; + u32 idx = link->wr_tx_cnt; struct ib_send_wr *wr_ib; u64 wr_id; - u32 idx; int rc; *wr_buf = NULL; @@ -185,21 +186,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, if (rc) return rc; } else { - rc = wait_event_interruptible_timeout( + struct smc_link_group *lgr; + + lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + rc = wait_event_timeout( link->wr_tx_wait, + list_empty(&lgr->list) || /* lgr terminated */ (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); if (!rc) { /* timeout - terminate connections */ - struct smc_link_group *lgr; - - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); smc_lgr_terminate(lgr); return -EPIPE; } - if (rc == -ERESTARTSYS) - return -EINTR; if (idx == link->wr_tx_cnt) return -EPIPE; } @@ -249,8 +249,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) pend = container_of(priv, struct smc_wr_tx_pend, priv); rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], &failed_wr); - if (rc) + if (rc) { + struct smc_link_group *lgr = + container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + smc_wr_tx_put_slot(link, priv); + smc_lgr_terminate(lgr); + } return rc; } @@ -300,18 +306,18 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) return rc; } -void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, +void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, unsigned long data) { struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_rx; + struct smc_wr_rx_hdr *wr_tx; int i; for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; - if (wr_rx->type != wr_rx_hdr_type) + wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; + if (wr_tx->type != wr_tx_hdr_type) continue; tx_pend = &link->wr_tx_pends[i].priv; if (filter(tx_pend, data)) @@ -319,24 +325,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, } } -bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, unsigned long data) -{ - struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_rx; - int i; - - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; - if (wr_rx->type != wr_rx_hdr_type) - continue; - tx_pend = &link->wr_tx_pends[i].priv; - if (filter(tx_pend, data)) - return true; - } - return false; -} - /****************************** receive queue ********************************/ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 2acf12b06063..ef0c3494c9cb 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -93,8 +93,6 @@ int smc_wr_tx_put_slot(struct smc_link *link, int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); -bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, unsigned long data); void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, diff --git a/net/socket.c b/net/socket.c index fbfae1ed3ff5..11cc2cd0f37b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -961,9 +961,28 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * If this ioctl is unknown try to hand it down * to the NIC driver. */ - if (err == -ENOIOCTLCMD) - err = dev_ioctl(net, cmd, argp); + if (err != -ENOIOCTLCMD) + return err; + if (cmd == SIOCGIFCONF) { + struct ifconf ifc; + if (copy_from_user(&ifc, argp, sizeof(struct ifconf))) + return -EFAULT; + rtnl_lock(); + err = dev_ifconf(net, &ifc, sizeof(struct ifreq)); + rtnl_unlock(); + if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) + err = -EFAULT; + } else { + struct ifreq ifr; + bool need_copyout; + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + return -EFAULT; + } return err; } @@ -988,12 +1007,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) sock = file->private_data; sk = sock->sk; net = sock_net(sk); - if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - err = dev_ioctl(net, cmd, argp); + if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) { + struct ifreq ifr; + bool need_copyout; + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + return -EFAULT; } else #ifdef CONFIG_WEXT_CORE if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - err = dev_ioctl(net, cmd, argp); + err = wext_handle_ioctl(net, cmd, argp); } else #endif switch (cmd) { @@ -2613,15 +2639,6 @@ out_fs: core_initcall(sock_init); /* early initcall */ -static int __init jit_init(void) -{ -#ifdef CONFIG_BPF_JIT_ALWAYS_ON - bpf_jit_enable = 1; -#endif - return 0; -} -pure_initcall(jit_init); - #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { @@ -2663,89 +2680,25 @@ static int do_siocgstampns(struct net *net, struct socket *sock, return err; } -static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32) -{ - struct ifreq __user *uifr; - int err; - - uifr = compat_alloc_user_space(sizeof(struct ifreq)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - err = dev_ioctl(net, SIOCGIFNAME, uifr); - if (err) - return err; - - if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq))) - return -EFAULT; - - return 0; -} - -static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) +static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) { struct compat_ifconf ifc32; struct ifconf ifc; - struct ifconf __user *uifc; - struct compat_ifreq __user *ifr32; - struct ifreq __user *ifr; - unsigned int i, j; int err; if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT; - memset(&ifc, 0, sizeof(ifc)); - if (ifc32.ifcbuf == 0) { - ifc32.ifc_len = 0; - ifc.ifc_len = 0; - ifc.ifc_req = NULL; - uifc = compat_alloc_user_space(sizeof(struct ifconf)); - } else { - size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) * - sizeof(struct ifreq); - uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); - ifc.ifc_len = len; - ifr = ifc.ifc_req = (void __user *)(uifc + 1); - ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) { - if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - ifr++; - ifr32++; - } - } - if (copy_to_user(uifc, &ifc, sizeof(struct ifconf))) - return -EFAULT; + ifc.ifc_len = ifc32.ifc_len; + ifc.ifc_req = compat_ptr(ifc32.ifcbuf); - err = dev_ioctl(net, SIOCGIFCONF, uifc); + rtnl_lock(); + err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq)); + rtnl_unlock(); if (err) return err; - if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) - return -EFAULT; - - ifr = ifc.ifc_req; - ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0, j = 0; - i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; - i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) { - if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq))) - return -EFAULT; - ifr32++; - ifr++; - } - - if (ifc32.ifcbuf == 0) { - /* Translate from 64-bit structure multiple to - * a 32-bit one. - */ - i = ifc.ifc_len; - i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq)); - ifc32.ifc_len = i; - } else { - ifc32.ifc_len = i; - } + ifc32.ifc_len = ifc.ifc_len; if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) return -EFAULT; @@ -2756,9 +2709,9 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) { struct compat_ethtool_rxnfc __user *compat_rxnfc; bool convert_in = false, convert_out = false; - size_t buf_size = ALIGN(sizeof(struct ifreq), 8); - struct ethtool_rxnfc __user *rxnfc; - struct ifreq __user *ifr; + size_t buf_size = 0; + struct ethtool_rxnfc __user *rxnfc = NULL; + struct ifreq ifr; u32 rule_cnt = 0, actual_rule_cnt; u32 ethcmd; u32 data; @@ -2795,18 +2748,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) case ETHTOOL_SRXCLSRLDEL: buf_size += sizeof(struct ethtool_rxnfc); convert_in = true; + rxnfc = compat_alloc_user_space(buf_size); break; } - ifr = compat_alloc_user_space(buf_size); - rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8); - - if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; - if (put_user(convert_in ? rxnfc : compat_ptr(data), - &ifr->ifr_ifru.ifru_data)) - return -EFAULT; + ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc; if (convert_in) { /* We expect there to be holes between fs.m_ext and @@ -2834,7 +2783,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) return -EFAULT; } - ret = dev_ioctl(net, SIOCETHTOOL, ifr); + ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL); if (ret) return ret; @@ -2875,113 +2824,43 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) { - void __user *uptr; compat_uptr_t uptr32; - struct ifreq __user *uifr; + struct ifreq ifr; + void __user *saved; + int err; - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) + if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) return -EFAULT; - uptr = compat_ptr(uptr32); + saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc; + ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32); - if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc)) - return -EFAULT; - - return dev_ioctl(net, SIOCWANDEV, uifr); -} - -static int bond_ioctl(struct net *net, unsigned int cmd, - struct compat_ifreq __user *ifr32) -{ - struct ifreq kifr; - mm_segment_t old_fs; - int err; - - switch (cmd) { - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, - (struct ifreq __user __force *) &kifr); - set_fs(old_fs); - - return err; - default: - return -ENOIOCTLCMD; + err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL); + if (!err) { + ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved; + if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq))) + err = -EFAULT; } + return err; } /* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *u_ifreq32) { - struct ifreq __user *u_ifreq64; - char tmp_buf[IFNAMSIZ]; - void __user *data64; + struct ifreq ifreq; u32 data32; - if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), - IFNAMSIZ)) + if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) return -EFAULT; - if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) + if (get_user(data32, &u_ifreq32->ifr_data)) return -EFAULT; - data64 = compat_ptr(data32); + ifreq.ifr_data = compat_ptr(data32); - u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); - - if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], - IFNAMSIZ)) - return -EFAULT; - if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) - return -EFAULT; - - return dev_ioctl(net, cmd, u_ifreq64); -} - -static int dev_ifsioc(struct net *net, struct socket *sock, - unsigned int cmd, struct compat_ifreq __user *uifr32) -{ - struct ifreq __user *uifr; - int err; - - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) - return -EFAULT; - - err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); - - if (!err) { - switch (cmd) { - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFMEM: - case SIOCGIFHWADDR: - case SIOCGIFINDEX: - case SIOCGIFADDR: - case SIOCGIFBRDADDR: - case SIOCGIFDSTADDR: - case SIOCGIFNETMASK: - case SIOCGIFPFLAGS: - case SIOCGIFTXQLEN: - case SIOCGMIIPHY: - case SIOCGMIIREG: - if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) - err = -EFAULT; - break; - } - } - return err; + return dev_ioctl(net, cmd, &ifreq, NULL); } static int compat_sioc_ifmap(struct net *net, unsigned int cmd, @@ -2989,7 +2868,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, { struct ifreq ifr; struct compat_ifmap __user *uifmap32; - mm_segment_t old_fs; int err; uifmap32 = &uifr32->ifr_ifru.ifru_map; @@ -3003,10 +2881,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, if (err) return -EFAULT; - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, (void __user __force *)&ifr); - set_fs(old_fs); + err = dev_ioctl(net, cmd, &ifr, NULL); if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); @@ -3139,10 +3014,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFBR: case SIOCGIFBR: return old_bridge_ioctl(argp); - case SIOCGIFNAME: - return dev_ifname32(net, argp); case SIOCGIFCONF: - return dev_ifconf(net, argp); + return compat_dev_ifconf(net, argp); case SIOCETHTOOL: return ethtool_ioctl(net, argp); case SIOCWANDEV: @@ -3150,11 +3023,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGIFMAP: case SIOCSIFMAP: return compat_sioc_ifmap(net, cmd, argp); - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - return bond_ioctl(net, cmd, argp); case SIOCADDRT: case SIOCDELRT: return routing_ioctl(net, sock, cmd, argp); @@ -3214,12 +3082,15 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - return dev_ifsioc(net, sock, cmd, argp); - case SIOCSARP: case SIOCGARP: case SIOCDARP: case SIOCATMARK: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: + case SIOCGIFNAME: return sock_do_ioctl(net, sock, cmd, arg); } @@ -3374,19 +3245,6 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage_locked); -int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - err = sock->ops->ioctl(sock, cmd, arg); - set_fs(oldfs); - - return err; -} -EXPORT_SYMBOL(kernel_sock_ioctl); - int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); diff --git a/net/tipc/group.c b/net/tipc/group.c index 497ee34bfab9..122162a31816 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -93,26 +93,21 @@ struct tipc_group { u16 max_active; u16 bc_snd_nxt; u16 bc_ackers; + bool *open; bool loopback; bool events; - bool open; }; static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq); -bool tipc_group_is_open(struct tipc_group *grp) -{ - return grp->open; -} - static void tipc_group_open(struct tipc_member *m, bool *wakeup) { *wakeup = false; if (list_empty(&m->small_win)) return; list_del_init(&m->small_win); - m->group->open = true; + *m->group->open = true; *wakeup = true; } @@ -170,7 +165,8 @@ int tipc_group_size(struct tipc_group *grp) } struct tipc_group *tipc_group_create(struct net *net, u32 portid, - struct tipc_group_req *mreq) + struct tipc_group_req *mreq, + bool *group_is_open) { u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS; bool global = mreq->scope != TIPC_NODE_SCOPE; @@ -192,6 +188,7 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, grp->scope = mreq->scope; grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; + grp->open = group_is_open; filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE; if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, filter, &grp->subid)) @@ -430,7 +427,7 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, if (m->window >= len) return false; - grp->open = false; + *grp->open = false; /* If not fully advertised, do it now to prevent mutual blocking */ adv = m->advertised; @@ -453,7 +450,7 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len) /* If prev bcast was replicast, reject until all receivers have acked */ if (grp->bc_ackers) { - grp->open = false; + *grp->open = false; return true; } if (list_empty(&grp->small_win)) @@ -800,7 +797,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, if (--grp->bc_ackers) return; list_del_init(&m->small_win); - m->group->open = true; + *m->group->open = true; *usr_wakeup = true; tipc_group_update_member(m, 0); return; diff --git a/net/tipc/group.h b/net/tipc/group.h index f4a596ed9848..5996af6e9f1d 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -43,7 +43,8 @@ struct tipc_member; struct tipc_msg; struct tipc_group *tipc_group_create(struct net *net, u32 portid, - struct tipc_group_req *mreq); + struct tipc_group_req *mreq, + bool *group_is_open); void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf); void tipc_group_delete(struct net *net, struct tipc_group *grp); void tipc_group_add_member(struct tipc_group *grp, u32 node, @@ -67,7 +68,6 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack); bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, int len, struct tipc_member **m); bool tipc_group_bc_cong(struct tipc_group *grp, int len); -bool tipc_group_is_open(struct tipc_group *grp); void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, u32 port, struct sk_buff_head *xmitq); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d799e50ff722..473a096b6fba 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -116,6 +116,7 @@ struct tipc_sock { struct tipc_mc_method mc_method; struct rcu_head rcu; struct tipc_group *group; + bool group_is_open; }; static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@ -715,7 +716,6 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_group *grp; u32 revents = 0; sock_poll_wait(file, sk_sleep(sk), wait); @@ -736,8 +736,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, revents |= POLLIN | POLLRDNORM; break; case TIPC_OPEN: - grp = tsk->group; - if ((!grp || tipc_group_is_open(grp)) && !tsk->cong_link_cnt) + if (tsk->group_is_open && !tsk->cong_link_cnt) revents |= POLLOUT; if (!tipc_sk_type_connectionless(sk)) break; @@ -2758,7 +2757,7 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) return -EINVAL; if (grp) return -EACCES; - grp = tipc_group_create(net, tsk->portid, mreq); + grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); if (!grp) return -ENOMEM; tsk->group = grp; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index e07ee3ae0023..736719c8314e 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -367,8 +367,10 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, crypto_info = &ctx->crypto_send; /* Currently we don't support set crypto info more than one time */ - if (TLS_CRYPTO_INFO_READY(crypto_info)) + if (TLS_CRYPTO_INFO_READY(crypto_info)) { + rc = -EBUSY; goto out; + } rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); if (rc) { @@ -386,7 +388,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, case TLS_CIPHER_AES_GCM_128: { if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { rc = -EINVAL; - goto out; + goto err_crypto_info; } rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), optlen - sizeof(*crypto_info)); @@ -398,7 +400,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, } default: rc = -EINVAL; - goto out; + goto err_crypto_info; } /* currently SW is default, we will have ethtool in future */ @@ -454,6 +456,15 @@ static int tls_init(struct sock *sk) struct tls_context *ctx; int rc = 0; + /* The TLS ulp is currently supported only for TCP sockets + * in ESTABLISHED state. + * Supporting sockets in LISTEN state will require us + * to modify the accept implementation to clone rather then + * share the ulp context. + */ + if (sk->sk_state != TCP_ESTABLISHED) + return -ENOTSUPP; + /* allocate tls context */ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9773571b6a34..0a9b72fbd761 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -577,6 +577,8 @@ alloc_payload: get_page(page); sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; sg_set_page(sg, page, copy, offset); + sg_unmark_end(sg); + ctx->sg_plaintext_num_elem++; sk_mem_charge(sk, copy); @@ -681,18 +683,17 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) } default: rc = -EINVAL; - goto out; + goto free_priv; } ctx->prepend_size = TLS_HEADER_SIZE + nonce_size; ctx->tag_size = tag_size; ctx->overhead_size = ctx->prepend_size + ctx->tag_size; ctx->iv_size = iv_size; - ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, - GFP_KERNEL); + ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL); if (!ctx->iv) { rc = -ENOMEM; - goto out; + goto free_priv; } memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); @@ -740,7 +741,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size); if (!rc) - goto out; + return 0; free_aead: crypto_free_aead(sw_ctx->aead_send); @@ -751,6 +752,9 @@ free_rec_seq: free_iv: kfree(ctx->iv); ctx->iv = NULL; +free_priv: + kfree(ctx->priv_ctx); + ctx->priv_ctx = NULL; out: return rc; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b48eb6d104c9..ab0c687d0c44 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9835,7 +9835,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, */ if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss && rdev->ops->get_station) { - struct station_info sinfo; + struct station_info sinfo = {}; u8 *mac_addr; mac_addr = wdev->current_bss->pub.bssid; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 7ca04a7de85a..05186a47878f 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1254,8 +1254,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - /* we are under RTNL - globally locked - so can use a static struct */ - static struct station_info sinfo; + struct station_info sinfo = {}; u8 addr[ETH_ALEN]; int err; diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 6cdb054484d6..9efbfc753347 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -1035,18 +1035,23 @@ static int ioctl_standard_call(struct net_device * dev, } -int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, - void __user *arg) +int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct iw_request_info info = { .cmd = cmd, .flags = 0 }; + struct iwreq iwr; int ret; - ret = wext_ioctl_dispatch(net, iwr, cmd, &info, + if (copy_from_user(&iwr, arg, sizeof(iwr))) + return -EFAULT; + + iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0; + + ret = wext_ioctl_dispatch(net, &iwr, cmd, &info, ioctl_standard_call, ioctl_private_call); if (ret >= 0 && IW_IS_GET(cmd) && - copy_to_user(arg, iwr, sizeof(struct iwreq))) + copy_to_user(arg, &iwr, sizeof(struct iwreq))) return -EFAULT; return ret; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 75982506617b..92b4648e75ca 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -184,6 +184,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, err = dev->xfrmdev_ops->xdo_dev_state_add(x); if (err) { + xso->dev = NULL; dev_put(dev); return err; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 20b1e414dbee..54e21f19d722 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -317,7 +317,7 @@ retry: if (!type && try_load) { request_module("xfrm-offload-%d-%d", family, proto); - try_load = 0; + try_load = false; goto retry; } @@ -2279,8 +2279,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) goto error; } - x->km.state = XFRM_STATE_VALID; - error: return err; } @@ -2289,7 +2287,13 @@ EXPORT_SYMBOL(__xfrm_init_state); int xfrm_init_state(struct xfrm_state *x) { - return __xfrm_init_state(x, true, false); + int err; + + err = __xfrm_init_state(x, true, false); + if (!err) + x->km.state = XFRM_STATE_VALID; + + return err; } EXPORT_SYMBOL(xfrm_init_state); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index bdb48e5dba04..7f52b8eb177d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -598,13 +598,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } - if (attrs[XFRMA_OFFLOAD_DEV]) { - err = xfrm_dev_state_add(net, x, - nla_data(attrs[XFRMA_OFFLOAD_DEV])); - if (err) - goto error; - } - if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) goto error; @@ -620,6 +613,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, /* override default values from above */ xfrm_update_ae_params(x, attrs, 0); + /* configure the hardware if offload is requested */ + if (attrs[XFRMA_OFFLOAD_DEV]) { + err = xfrm_dev_state_add(net, x, + nla_data(attrs[XFRMA_OFFLOAD_DEV])); + if (err) + goto error; + } + return x; error: @@ -662,6 +663,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } + if (x->km.state == XFRM_STATE_VOID) + x->km.state = XFRM_STATE_VALID; + c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c index 12e1024069c2..0c12048ac79f 100644 --- a/samples/bpf/xdp2skb_meta_kern.c +++ b/samples/bpf/xdp2skb_meta_kern.c @@ -35,15 +35,17 @@ int _xdp_mark(struct xdp_md *ctx) void *data, *data_end; int ret; - /* Reserve space in-front data pointer for our meta info. + /* Reserve space in-front of data pointer for our meta info. * (Notice drivers not supporting data_meta will fail here!) */ ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta)); if (ret < 0) return XDP_ABORTED; - /* For some unknown reason, these ctx pointers must be read - * after bpf_xdp_adjust_meta, else verifier will reject prog. + /* Notice: Kernel-side verifier requires that loading of + * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(), + * as pkt-data pointers are invalidated. Helpers that require + * this are determined/marked by bpf_helper_changes_pkt_data() */ data = (void *)(unsigned long)ctx->data; diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c index c969141bfa8b..211db8ded0de 100644 --- a/samples/bpf/xdp_monitor_kern.c +++ b/samples/bpf/xdp_monitor_kern.c @@ -1,6 +1,7 @@ -/* XDP monitor tool, based on tracepoints +/* SPDX-License-Identifier: GPL-2.0 + * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc. * - * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + * XDP monitor tool, based on tracepoints */ #include <uapi/linux/bpf.h> #include "bpf_helpers.h" @@ -118,3 +119,92 @@ int trace_xdp_exception(struct xdp_exception_ctx *ctx) return 0; } + +/* Common stats data record shared with _user.c */ +struct datarec { + u64 processed; + u64 dropped; + u64 info; +}; +#define MAX_CPUS 64 + +struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = MAX_CPUS, +}; + +struct bpf_map_def SEC("maps") cpumap_kthread_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = 1, +}; + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct cpumap_enqueue_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int cpu; // offset:16; size:4; signed:1; + unsigned int drops; // offset:20; size:4; signed:0; + unsigned int processed; // offset:24; size:4; signed:0; + int to_cpu; // offset:28; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_cpumap_enqueue") +int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) +{ + u32 to_cpu = ctx->to_cpu; + struct datarec *rec; + + if (to_cpu >= MAX_CPUS) + return 1; + + rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); + if (!rec) + return 0; + rec->processed += ctx->processed; + rec->dropped += ctx->drops; + + /* Record bulk events, then userspace can calc average bulk size */ + if (ctx->processed > 0) + rec->info += 1; + + return 0; +} + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct cpumap_kthread_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + int cpu; // offset:16; size:4; signed:1; + unsigned int drops; // offset:20; size:4; signed:0; + unsigned int processed; // offset:24; size:4; signed:0; + int sched; // offset:28; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_cpumap_kthread") +int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) +{ + struct datarec *rec; + u32 key = 0; + + rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); + if (!rec) + return 0; + rec->processed += ctx->processed; + rec->dropped += ctx->drops; + + /* Count times kthread yielded CPU via schedule call */ + if (ctx->sched) + rec->info++; + + return 0; +} diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index eaba165b3549..eec14520d513 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -1,4 +1,5 @@ -/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. +/* SPDX-License-Identifier: GPL-2.0 + * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */ static const char *__doc__= "XDP monitor tool, based on tracepoints\n" @@ -40,6 +41,9 @@ static const struct option long_options[] = { {0, 0, NULL, 0 } }; +/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */ +#define EXIT_FAIL_MEM 5 + static void usage(char *argv[]) { int i; @@ -108,23 +112,93 @@ static const char *action2str(int action) return NULL; } +/* Common stats data record shared with _kern.c */ +struct datarec { + __u64 processed; + __u64 dropped; + __u64 info; +}; +#define MAX_CPUS 64 + +/* Userspace structs for collection of stats from maps */ struct record { - __u64 counter; __u64 timestamp; + struct datarec total; + struct datarec *cpu; +}; +struct u64rec { + __u64 processed; +}; +struct record_u64 { + /* record for _kern side __u64 values */ + __u64 timestamp; + struct u64rec total; + struct u64rec *cpu; }; struct stats_record { - struct record xdp_redir[REDIR_RES_MAX]; - struct record xdp_exception[XDP_ACTION_MAX]; + struct record_u64 xdp_redirect[REDIR_RES_MAX]; + struct record_u64 xdp_exception[XDP_ACTION_MAX]; + struct record xdp_cpumap_kthread; + struct record xdp_cpumap_enqueue[MAX_CPUS]; }; -static void stats_print_headers(bool err_only) +static bool map_collect_record(int fd, __u32 key, struct record *rec) { - if (err_only) - printf("\n%s\n", __doc_err_only__); + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct datarec values[nr_cpus]; + __u64 sum_processed = 0; + __u64 sum_dropped = 0; + __u64 sum_info = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return false; + } + /* Get time as close as possible to reading map contents */ + rec->timestamp = gettime(); - printf("%-14s %-11s %-10s %-18s %-9s\n", - "ACTION", "result", "pps ", "pps-human-readable", "measure-period"); + /* Record and sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + rec->cpu[i].processed = values[i].processed; + sum_processed += values[i].processed; + rec->cpu[i].dropped = values[i].dropped; + sum_dropped += values[i].dropped; + rec->cpu[i].info = values[i].info; + sum_info += values[i].info; + } + rec->total.processed = sum_processed; + rec->total.dropped = sum_dropped; + rec->total.info = sum_info; + return true; +} + +static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct u64rec values[nr_cpus]; + __u64 sum_total = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return false; + } + /* Get time as close as possible to reading map contents */ + rec->timestamp = gettime(); + + /* Record and sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + rec->cpu[i].processed = values[i].processed; + sum_total += values[i].processed; + } + rec->total.processed = sum_total; + return true; } static double calc_period(struct record *r, struct record *p) @@ -139,77 +213,203 @@ static double calc_period(struct record *r, struct record *p) return period_; } -static double calc_pps(struct record *r, struct record *p, double period) +static double calc_period_u64(struct record_u64 *r, struct record_u64 *p) +{ + double period_ = 0; + __u64 period = 0; + + period = r->timestamp - p->timestamp; + if (period > 0) + period_ = ((double) period / NANOSEC_PER_SEC); + + return period_; +} + +static double calc_pps(struct datarec *r, struct datarec *p, double period) +{ + __u64 packets = 0; + double pps = 0; + + if (period > 0) { + packets = r->processed - p->processed; + pps = packets / period; + } + return pps; +} + +static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period) +{ + __u64 packets = 0; + double pps = 0; + + if (period > 0) { + packets = r->processed - p->processed; + pps = packets / period; + } + return pps; +} + +static double calc_drop(struct datarec *r, struct datarec *p, double period) +{ + __u64 packets = 0; + double pps = 0; + + if (period > 0) { + packets = r->dropped - p->dropped; + pps = packets / period; + } + return pps; +} + +static double calc_info(struct datarec *r, struct datarec *p, double period) { __u64 packets = 0; double pps = 0; if (period > 0) { - packets = r->counter - p->counter; + packets = r->info - p->info; pps = packets / period; } return pps; } -static void stats_print(struct stats_record *rec, - struct stats_record *prev, +static void stats_print(struct stats_record *stats_rec, + struct stats_record *stats_prev, bool err_only) { - double period = 0, pps = 0; - struct record *r, *p; - int i = 0; + unsigned int nr_cpus = bpf_num_possible_cpus(); + int rec_i = 0, i, to_cpu; + double t = 0, pps = 0; - char *fmt = "%-14s %-11s %-10.0f %'-18.0f %f\n"; + /* Header */ + printf("%-15s %-7s %-12s %-12s %-9s\n", + "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info"); /* tracepoint: xdp:xdp_redirect_* */ if (err_only) - i = REDIR_ERROR; - - for (; i < REDIR_RES_MAX; i++) { - r = &rec->xdp_redir[i]; - p = &prev->xdp_redir[i]; - - if (p->timestamp) { - period = calc_period(r, p); - pps = calc_pps(r, p, period); + rec_i = REDIR_ERROR; + + for (; rec_i < REDIR_RES_MAX; rec_i++) { + struct record_u64 *rec, *prev; + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; + + rec = &stats_rec->xdp_redirect[rec_i]; + prev = &stats_prev->xdp_redirect[rec_i]; + t = calc_period_u64(rec, prev); + + for (i = 0; i < nr_cpus; i++) { + struct u64rec *r = &rec->cpu[i]; + struct u64rec *p = &prev->cpu[i]; + + pps = calc_pps_u64(r, p, t); + if (pps > 0) + printf(fmt1, "XDP_REDIRECT", i, + rec_i ? 0.0: pps, rec_i ? pps : 0.0, + err2str(rec_i)); } - printf(fmt, "XDP_REDIRECT", err2str(i), pps, pps, period); + pps = calc_pps_u64(&rec->total, &prev->total, t); + printf(fmt2, "XDP_REDIRECT", "total", + rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i)); } /* tracepoint: xdp:xdp_exception */ - for (i = 0; i < XDP_ACTION_MAX; i++) { - r = &rec->xdp_exception[i]; - p = &prev->xdp_exception[i]; - if (p->timestamp) { - period = calc_period(r, p); - pps = calc_pps(r, p, period); + for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) { + struct record_u64 *rec, *prev; + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; + + rec = &stats_rec->xdp_exception[rec_i]; + prev = &stats_prev->xdp_exception[rec_i]; + t = calc_period_u64(rec, prev); + + for (i = 0; i < nr_cpus; i++) { + struct u64rec *r = &rec->cpu[i]; + struct u64rec *p = &prev->cpu[i]; + + pps = calc_pps_u64(r, p, t); + if (pps > 0) + printf(fmt1, "Exception", i, + 0.0, pps, err2str(rec_i)); } + pps = calc_pps_u64(&rec->total, &prev->total, t); if (pps > 0) - printf(fmt, action2str(i), "Exception", - pps, pps, period); + printf(fmt2, "Exception", "total", + 0.0, pps, action2str(rec_i)); } - printf("\n"); -} -static __u64 get_key32_value64_percpu(int fd, __u32 key) -{ - /* For percpu maps, userspace gets a value per possible CPU */ - unsigned int nr_cpus = bpf_num_possible_cpus(); - __u64 values[nr_cpus]; - __u64 sum = 0; - int i; - - if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { - fprintf(stderr, - "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); - return 0; + /* cpumap enqueue stats */ + for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) { + char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; + char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; + struct record *rec, *prev; + char *info_str = ""; + double drop, info; + + rec = &stats_rec->xdp_cpumap_enqueue[to_cpu]; + prev = &stats_prev->xdp_cpumap_enqueue[to_cpu]; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop(r, p, t); + info = calc_info(r, p, t); + if (info > 0) { + info_str = "bulk-average"; + info = pps / info; /* calc average bulk size */ + } + if (pps > 0) + printf(fmt1, "cpumap-enqueue", + i, to_cpu, pps, drop, info, info_str); + } + pps = calc_pps(&rec->total, &prev->total, t); + if (pps > 0) { + drop = calc_drop(&rec->total, &prev->total, t); + info = calc_info(&rec->total, &prev->total, t); + if (info > 0) { + info_str = "bulk-average"; + info = pps / info; /* calc average bulk size */ + } + printf(fmt2, "cpumap-enqueue", + "sum", to_cpu, pps, drop, info, info_str); + } } - /* Sum values from each CPU */ - for (i = 0; i < nr_cpus; i++) { - sum += values[i]; + /* cpumap kthread stats */ + { + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n"; + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n"; + struct record *rec, *prev; + double drop, info; + char *i_str = ""; + + rec = &stats_rec->xdp_cpumap_kthread; + prev = &stats_prev->xdp_cpumap_kthread; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop(r, p, t); + info = calc_info(r, p, t); + if (info > 0) + i_str = "sched"; + if (pps > 0) + printf(fmt1, "cpumap-kthread", + i, pps, drop, info, i_str); + } + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop(&rec->total, &prev->total, t); + info = calc_info(&rec->total, &prev->total, t); + if (info > 0) + i_str = "sched-sum"; + printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str); } - return sum; + + printf("\n"); } static bool stats_collect(struct stats_record *rec) @@ -222,25 +422,109 @@ static bool stats_collect(struct stats_record *rec) */ fd = map_data[0].fd; /* map0: redirect_err_cnt */ - for (i = 0; i < REDIR_RES_MAX; i++) { - rec->xdp_redir[i].timestamp = gettime(); - rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i); - } + for (i = 0; i < REDIR_RES_MAX; i++) + map_collect_record_u64(fd, i, &rec->xdp_redirect[i]); fd = map_data[1].fd; /* map1: exception_cnt */ for (i = 0; i < XDP_ACTION_MAX; i++) { - rec->xdp_exception[i].timestamp = gettime(); - rec->xdp_exception[i].counter = get_key32_value64_percpu(fd, i); + map_collect_record_u64(fd, i, &rec->xdp_exception[i]); } + fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */ + for (i = 0; i < MAX_CPUS; i++) + map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]); + + fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */ + map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); + return true; } +static void *alloc_rec_per_cpu(int record_size) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + void *array; + size_t size; + + size = record_size * nr_cpus; + array = malloc(size); + memset(array, 0, size); + if (!array) { + fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); + exit(EXIT_FAIL_MEM); + } + return array; +} + +static struct stats_record *alloc_stats_record(void) +{ + struct stats_record *rec; + int rec_sz; + int i; + + /* Alloc main stats_record structure */ + rec = malloc(sizeof(*rec)); + memset(rec, 0, sizeof(*rec)); + if (!rec) { + fprintf(stderr, "Mem alloc error\n"); + exit(EXIT_FAIL_MEM); + } + + /* Alloc stats stored per CPU for each record */ + rec_sz = sizeof(struct u64rec); + for (i = 0; i < REDIR_RES_MAX; i++) + rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz); + + for (i = 0; i < XDP_ACTION_MAX; i++) + rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz); + + rec_sz = sizeof(struct datarec); + rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz); + + for (i = 0; i < MAX_CPUS; i++) + rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz); + + return rec; +} + +static void free_stats_record(struct stats_record *r) +{ + int i; + + for (i = 0; i < REDIR_RES_MAX; i++) + free(r->xdp_redirect[i].cpu); + + for (i = 0; i < XDP_ACTION_MAX; i++) + free(r->xdp_exception[i].cpu); + + free(r->xdp_cpumap_kthread.cpu); + + for (i = 0; i < MAX_CPUS; i++) + free(r->xdp_cpumap_enqueue[i].cpu); + + free(r); +} + +/* Pointer swap trick */ +static inline void swap(struct stats_record **a, struct stats_record **b) +{ + struct stats_record *tmp; + + tmp = *a; + *a = *b; + *b = tmp; +} + static void stats_poll(int interval, bool err_only) { - struct stats_record rec, prev; + struct stats_record *rec, *prev; - memset(&rec, 0, sizeof(rec)); + rec = alloc_stats_record(); + prev = alloc_stats_record(); + stats_collect(rec); + + if (err_only) + printf("\n%s\n", __doc_err_only__); /* Trick to pretty printf with thousands separators use %' */ setlocale(LC_NUMERIC, "en_US"); @@ -258,13 +542,15 @@ static void stats_poll(int interval, bool err_only) fflush(stdout); while (1) { - memcpy(&prev, &rec, sizeof(rec)); - stats_collect(&rec); - stats_print_headers(err_only); - stats_print(&rec, &prev, err_only); + swap(&prev, &rec); + stats_collect(rec); + stats_print(rec, prev, err_only); fflush(stdout); sleep(interval); } + + free_stats_record(rec); + free_stats_record(prev); } static void print_bpf_prog_info(void) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index cb8997ed0149..47cddf32aeba 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -265,12 +265,18 @@ else objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) endif +ifdef CONFIG_MODVERSIONS +objtool_o = $(@D)/.tmp_$(@F) +else +objtool_o = $(@) +endif + # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file # 'OBJECT_FILES_NON_STANDARD_foo.o := 'n': override directory skip for a file cmd_objtool = $(if $(patsubst y%,, \ $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \ - $(__objtool_obj) $(objtool_args) "$(@)";) + $(__objtool_obj) $(objtool_args) "$(objtool_o)";) objtool_obj = $(if $(patsubst y%,, \ $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \ $(__objtool_obj)) @@ -286,16 +292,16 @@ objtool_dep = $(objtool_obj) \ define rule_cc_o_c $(call echo-cmd,checksrc) $(cmd_checksrc) \ $(call cmd_and_fixdep,cc_o_c) \ - $(cmd_modversions_c) \ $(cmd_checkdoc) \ $(call echo-cmd,objtool) $(cmd_objtool) \ + $(cmd_modversions_c) \ $(call echo-cmd,record_mcount) $(cmd_record_mcount) endef define rule_as_o_S $(call cmd_and_fixdep,as_o_S) \ - $(cmd_modversions_S) \ - $(call echo-cmd,objtool) $(cmd_objtool) + $(call echo-cmd,objtool) $(cmd_objtool) \ + $(cmd_modversions_S) endef # List module undefined symbols (or empty line if not enabled) diff --git a/scripts/decodecode b/scripts/decodecode index 438120da1361..5ea071099330 100755 --- a/scripts/decodecode +++ b/scripts/decodecode @@ -59,6 +59,14 @@ disas() { ${CROSS_COMPILE}strip $1.o fi + if [ "$ARCH" = "arm64" ]; then + if [ $width -eq 4 ]; then + type=inst + fi + + ${CROSS_COMPILE}strip $1.o + fi + ${CROSS_COMPILE}objdump $OBJDUMPFLAGS -S $1.o | \ grep -v "/tmp\|Disassembly\|\.text\|^$" > $1.dis 2>&1 } diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py index 1bf949c43b76..f6ab3ccf698f 100644 --- a/scripts/gdb/linux/tasks.py +++ b/scripts/gdb/linux/tasks.py @@ -96,6 +96,8 @@ def get_thread_info(task): thread_info_addr = task.address + ia64_task_size thread_info = thread_info_addr.cast(thread_info_ptr_type) else: + if task.type.fields()[0].type == thread_info_type.get_type(): + return task['thread_info'] thread_info = task['stack'].cast(thread_info_ptr_type) return thread_info.dereference() diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index 30044bc4f389..58c2bab4ef6e 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -172,7 +172,8 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, { char *ptr, *pptr, *tmp; off_t off = 0; - int ret, flen, proglen, pass, ulen = 0; + unsigned int proglen; + int ret, flen, pass, ulen = 0; regmatch_t pmatch[1]; unsigned long base; regex_t regex; @@ -199,7 +200,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, } ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so); - ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx", + ret = sscanf(ptr, "flen=%d proglen=%u pass=%d image=%lx", &flen, &proglen, &pass, &base); if (ret != 4) { regfree(®ex); @@ -239,7 +240,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, } assert(ulen == proglen); - printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n", + printf("%u bytes emitted from JIT compiler (pass:%d, flen:%d)\n", proglen, pass, flen); printf("%lx + <x>:\n", base); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 6601c95a9258..0b482c0070e0 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -34,6 +34,7 @@ /* Author: Jakub Kicinski <[email protected]> */ #include <errno.h> +#include <fcntl.h> #include <fts.h> #include <libgen.h> #include <mntent.h> @@ -433,6 +434,77 @@ ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) return if_indextoname(ifindex, buf); } +static int read_sysfs_hex_int(char *path) +{ + char vendor_id_buf[8]; + int len; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) { + p_err("Can't open %s: %s", path, strerror(errno)); + return -1; + } + + len = read(fd, vendor_id_buf, sizeof(vendor_id_buf)); + close(fd); + if (len < 0) { + p_err("Can't read %s: %s", path, strerror(errno)); + return -1; + } + if (len >= (int)sizeof(vendor_id_buf)) { + p_err("Value in %s too long", path); + return -1; + } + + vendor_id_buf[len] = 0; + + return strtol(vendor_id_buf, NULL, 0); +} + +static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) +{ + char full_path[64]; + + snprintf(full_path, sizeof(full_path), "/sys/class/net/%s/device/%s", + devname, entry_name); + + return read_sysfs_hex_int(full_path); +} + +const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) +{ + char devname[IF_NAMESIZE]; + int vendor_id; + int device_id; + + if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { + p_err("Can't get net device name for ifindex %d: %s", ifindex, + strerror(errno)); + return NULL; + } + + vendor_id = read_sysfs_netdev_hex_int(devname, "vendor"); + if (vendor_id < 0) { + p_err("Can't get device vendor id for %s", devname); + return NULL; + } + + switch (vendor_id) { + case 0x19ee: + device_id = read_sysfs_netdev_hex_int(devname, "device"); + if (device_id != 0x4000 && + device_id != 0x6000 && + device_id != 0x6003) + p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); + return "NFP-6xxx"; + default: + p_err("Can't get bfd arch name for device vendor id 0x%04x", + vendor_id); + return NULL; + } +} + void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) { char name[IF_NAMESIZE]; diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 57d32e8a1391..87439320ef70 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -76,7 +76,8 @@ static int fprintf_json(void *out, const char *fmt, ...) return 0; } -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch) { disassembler_ftype disassemble; struct disassemble_info info; @@ -100,6 +101,19 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) else init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf); + + /* Update architecture info for offload. */ + if (arch) { + const bfd_arch_info_type *inf = bfd_scan_arch(arch); + + if (inf) { + bfdf->arch_info = inf; + } else { + p_err("No libfd support for %s", arch); + return; + } + } + info.arch = bfd_get_arch(bfdf); info.mach = bfd_get_mach(bfdf); info.buffer = image; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 65b526fe6e7e..b8e9584d6246 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -121,7 +121,10 @@ int do_cgroup(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch); void print_hex_data_json(uint8_t *data, size_t len); +const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); + #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 8d7db9d6b9cd..f95fa67bb498 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -66,6 +66,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", [BPF_MAP_TYPE_DEVMAP] = "devmap", [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", }; static unsigned int get_possible_cpus(void) @@ -428,6 +429,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_name(json_wtr, "flags"); jsonw_printf(json_wtr, "%#x", info->map_flags); + + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); + jsonw_uint_field(json_wtr, "bytes_key", info->key_size); jsonw_uint_field(json_wtr, "bytes_value", info->value_size); jsonw_uint_field(json_wtr, "max_entries", info->max_entries); @@ -469,7 +473,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (*info->name) printf("name %s ", info->name); - printf("flags 0x%x\n", info->map_flags); + printf("flags 0x%x", info->map_flags); + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); + printf("\n"); printf("\tkey %uB value %uB max_entries %u", info->key_size, info->value_size, info->max_entries); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 099e21cf1b5c..e8e2baaf93c2 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -776,7 +776,17 @@ static int do_dump(int argc, char **argv) } } else { if (member_len == &info.jited_prog_len) { - disasm_print_insn(buf, *member_len, opcodes); + const char *name = NULL; + + if (info.ifindex) { + name = ifindex_to_bfd_name_ns(info.ifindex, + info.netns_dev, + info.netns_ino); + if (!name) + goto err_free; + } + + disasm_print_insn(buf, *member_len, opcodes, name); } else { kernel_syms_load(&dd); if (json_output) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 69f96af4a569..af1f49ad8b88 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -900,6 +900,9 @@ struct xdp_md { __u32 data; __u32 data_end; __u32 data_meta; + /* Below access go through struct xdp_rxq_info */ + __u32 ingress_ifindex; /* rxq->dev->ifindex */ + __u32 rx_queue_index; /* rxq->queue_index */ }; enum sk_action { @@ -935,6 +938,9 @@ struct bpf_map_info { __u32 max_entries; __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops @@ -956,6 +962,12 @@ struct bpf_sock_ops { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ }; /* List of known BPF sock_ops operators. @@ -1010,7 +1022,8 @@ struct bpf_perf_event_value { #define BPF_DEVCG_DEV_CHAR (1ULL << 1) struct bpf_cgroup_dev_ctx { - __u32 access_type; /* (access << 16) | type */ + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; __u32 major; __u32 minor; }; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 24460155c82c..c1c338661699 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -26,6 +26,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <errno.h> #include "elf.h" #include "warn.h" @@ -358,7 +359,8 @@ struct elf *elf_open(const char *name, int flags) elf->fd = open(name, flags); if (elf->fd == -1) { - perror("open"); + fprintf(stderr, "objtool: Can't open '%s': %s\n", + name, strerror(errno)); goto err; } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 541d9d7fad5a..1e09d77f1948 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -3,3 +3,10 @@ test_maps test_lru_map test_lpm_map test_tag +FEATURE-DUMP.libbpf +fixdep +test_align +test_dev_cgroup +test_progs +test_verifier_log +feature diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index a8aa7e251c8e..3a44b655d852 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -19,7 +19,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ - test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o + test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ + sample_map_ret0.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c new file mode 100644 index 000000000000..0756303676ac --- /dev/null +++ b/tools/testing/selftests/bpf/sample_map_ret0.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") htab = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +struct bpf_map_def SEC("maps") array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +/* Sample program which should always load for testing control paths. */ +SEC(".text") int func() +{ + __u64 key64 = 0; + __u32 key = 0; + long *value; + + value = bpf_map_lookup_elem(&htab, &key); + if (!value) + return 1; + value = bpf_map_lookup_elem(&array, &key64); + if (!value) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index f61480641b6e..081510853c6d 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -521,6 +521,126 @@ static void test_lpm_delete(void) close(map_fd); } +static void test_lpm_get_next_key(void) +{ + struct bpf_lpm_trie_key *key_p, *next_key_p; + size_t key_size; + __u32 value = 0; + int map_fd; + + key_size = sizeof(*key_p) + sizeof(__u32); + key_p = alloca(key_size); + next_key_p = alloca(key_size); + + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), + 100, BPF_F_NO_PREALLOC); + assert(map_fd >= 0); + + /* empty tree. get_next_key should return ENOENT */ + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 && + errno == ENOENT); + + /* get and verify the first key, get the second one should fail. */ + key_p->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* no exact matching key should get the first one in post order. */ + key_p->prefixlen = 8; + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + /* add one more element (total two) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* Add one more element (total three) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* Add one more element (total four) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* no exact matching key should return the first one in post order */ + key_p->prefixlen = 22; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 0); + + close(map_fd); +} + int main(void) { struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; @@ -545,6 +665,8 @@ int main(void) test_lpm_delete(); + test_lpm_get_next_key(); + printf("test_lpm: OK\n"); return 0; } diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index e3c750f17cb8..ae3eea3ab820 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -20,11 +20,13 @@ import os import pprint import random import string +import struct import subprocess import time logfile = None log_level = 1 +skip_extack = False bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) pp = pprint.PrettyPrinter() devs = [] # devices we created for clean up @@ -131,7 +133,7 @@ def rm(f): if f in files: files.remove(f) -def tool(name, args, flags, JSON=True, ns="", fail=True): +def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False): params = "" if JSON: params += "%s " % (flags["json"]) @@ -139,9 +141,20 @@ def tool(name, args, flags, JSON=True, ns="", fail=True): if ns != "": ns = "ip netns exec %s " % (ns) - ret, out = cmd(ns + name + " " + params + args, fail=fail) - if JSON and len(out.strip()) != 0: - return ret, json.loads(out) + if include_stderr: + ret, stdout, stderr = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=True) + else: + ret, stdout = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=False) + + if JSON and len(stdout.strip()) != 0: + out = json.loads(stdout) + else: + out = stdout + + if include_stderr: + return ret, out, stderr else: return ret, out @@ -156,6 +169,14 @@ def bpftool_prog_list(expected=None, ns=""): (len(progs), expected)) return progs +def bpftool_map_list(expected=None, ns=""): + _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) + if expected is not None: + if len(maps) != expected: + fail(True, "%d BPF maps loaded, expected %d" % + (len(maps), expected)) + return maps + def bpftool_prog_list_wait(expected=0, n_retry=20): for i in range(n_retry): nprogs = len(bpftool_prog_list()) @@ -164,13 +185,23 @@ def bpftool_prog_list_wait(expected=0, n_retry=20): time.sleep(0.05) raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) -def ip(args, force=False, JSON=True, ns="", fail=True): +def bpftool_map_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nmaps = len(bpftool_map_list()) + if nmaps == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) + +def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False): if force: args = "-force " + args - return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail) + return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) -def tc(args, JSON=True, ns="", fail=True): - return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail) +def tc(args, JSON=True, ns="", fail=True, include_stderr=False): + return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) def ethtool(dev, opt, args, fail=True): return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) @@ -193,6 +224,26 @@ def mknetns(n_retry=10): return name return None +def int2str(fmt, val): + ret = [] + for b in struct.pack(fmt, val): + ret.append(int(b)) + return " ".join(map(lambda x: str(x), ret)) + +def str2int(strtab): + inttab = [] + for i in strtab: + inttab.append(int(i, 16)) + ba = bytearray(inttab) + if len(strtab) == 4: + fmt = "I" + elif len(strtab) == 8: + fmt = "Q" + else: + raise Exception("String array of len %d can't be unpacked to an int" % + (len(strtab))) + return struct.unpack(fmt, ba)[0] + class DebugfsDir: """ Class for accessing DebugFS directories as a dictionary. @@ -311,13 +362,19 @@ class NetdevSim: return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), fail=fail) - def set_xdp(self, bpf, mode, force=False, fail=True): + def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False, + fail=True, include_stderr=False): + if verbose: + bpf += " verbose" return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), - force=force, fail=fail) + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) - def unset_xdp(self, mode, force=False, fail=True): + def unset_xdp(self, mode, force=False, JSON=True, + fail=True, include_stderr=False): return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), - force=force, fail=fail) + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) def ip_link_show(self, xdp): _, link = ip("link show dev %s" % (self['ifname'])) @@ -372,17 +429,39 @@ class NetdevSim: (len(filters), expected)) return filters - def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False, - fail=True): + def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None, + cls="", params="", + fail=True, include_stderr=False): + spec = "" + if prio is not None: + spec += " prio %d" % (prio) + if handle: + spec += " handle %s" % (handle) + + return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\ + .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec, + cls=cls, params=params), + fail=fail, include_stderr=include_stderr) + + def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None, + da=False, verbose=False, + skip_sw=False, skip_hw=False, + fail=True, include_stderr=False): + cls = "bpf " + bpf + params = "" if da: params += " da" + if verbose: + params += " verbose" if skip_sw: params += " skip_sw" if skip_hw: params += " skip_hw" - return tc("filter add dev %s ingress bpf %s %s" % - (self['ifname'], bpf, params), fail=fail) + + return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls, + params=params, + fail=fail, include_stderr=include_stderr) def set_ethtool_tc_offloads(self, enable, fail=True): args = "hw-tc-offload %s" % ("on" if enable else "off") @@ -390,12 +469,16 @@ class NetdevSim: ################################################################################ def clean_up(): + global files, netns, devs + for dev in devs: dev.remove() for f in files: cmd("rm -f %s" % (f)) for ns in netns: cmd("ip netns delete %s" % (ns)) + files = [] + netns = [] def pin_prog(file_name, idx=0): progs = bpftool_prog_list(expected=(idx + 1)) @@ -405,16 +488,31 @@ def pin_prog(file_name, idx=0): return file_name, bpf_pinned(file_name) -def check_dev_info(other_ns, ns, pin_file=None, removed=False): - if removed: - bpftool_prog_list(expected=0) - ret, err = bpftool("prog show pin %s" % (pin_file), fail=False) - fail(ret == 0, "Showing prog with removed device did not fail") - fail(err["error"].find("No such device") == -1, - "Showing prog with removed device expected ENODEV, error is %s" % - (err["error"])) - return - progs = bpftool_prog_list(expected=int(not removed), ns=ns) +def pin_map(file_name, idx=0, expected=1): + maps = bpftool_map_list(expected=expected) + m = maps[idx] + bpftool("map pin id %d %s" % (m["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def check_dev_info_removed(prog_file=None, map_file=None): + bpftool_prog_list(expected=0) + ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) + fail(ret == 0, "Showing prog with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing prog with removed device expected ENODEV, error is %s" % + (err["error"])) + + bpftool_map_list(expected=0) + ret, err = bpftool("map show pin %s" % (map_file), fail=False) + fail(ret == 0, "Showing map with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing map with removed device expected ENODEV, error is %s" % + (err["error"])) + +def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): + progs = bpftool_prog_list(expected=1, ns=ns) prog = progs[0] fail("dev" not in prog.keys(), "Device parameters not reported") @@ -423,16 +521,34 @@ def check_dev_info(other_ns, ns, pin_file=None, removed=False): fail("ns_dev" not in dev.keys(), "Device parameters not reported") fail("ns_inode" not in dev.keys(), "Device parameters not reported") - if not removed and not other_ns: + if not other_ns: fail("ifname" not in dev.keys(), "Ifname not reported") fail(dev["ifname"] != sim["ifname"], "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) else: fail("ifname" in dev.keys(), "Ifname is reported for other ns") - if removed: - fail(dev["ifindex"] != 0, "Device perameters not zero on removed") - fail(dev["ns_dev"] != 0, "Device perameters not zero on removed") - fail(dev["ns_inode"] != 0, "Device perameters not zero on removed") + + maps = bpftool_map_list(expected=2, ns=ns) + for m in maps: + fail("dev" not in m.keys(), "Device parameters not reported") + fail(dev != m["dev"], "Map's device different than program's") + +def check_extack(output, reference, args): + if skip_extack: + return + lines = output.split("\n") + comp = len(lines) >= 2 and lines[1] == reference + fail(not comp, "Missing or incorrect netlink extack message") + +def check_extack_nsim(output, reference, args): + check_extack(output, "Error: netdevsim: " + reference, args) + +def check_verifier_log(output, reference): + lines = output.split("\n") + for l in reversed(lines): + if l == reference: + return + fail(True, "Missing or incorrect message from netdevsim in verifier log") # Parse command line parser = argparse.ArgumentParser() @@ -464,12 +580,20 @@ if out.find("/sys/kernel/debug type debugfs") == -1: cmd("mount -t debugfs none /sys/kernel/debug") # Check samples are compiled -samples = ["sample_ret0.o"] +samples = ["sample_ret0.o", "sample_map_ret0.o"] for s in samples: ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) skip(ret != 0, "sample %s/%s not found, please compile it" % (bpf_test_dir, s)) +# Check if iproute2 is built with libmnl (needed by extack support) +_, _, err = cmd("tc qdisc delete dev lo handle 0", + fail=False, include_stderr=True) +if err.find("Error: Failed to find qdisc with specified handle.") == -1: + print("Warning: no extack message in iproute2 output, libmnl missing?") + log("Warning: no extack message in iproute2 output, libmnl missing?", "") + skip_extack = True + # Check if net namespaces seem to work ns = mknetns() skip(ns is None, "Could not create a net namespace") @@ -501,8 +625,10 @@ try: sim.tc_flush_filters() start_test("Test TC offloads are off by default...") - ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) fail(ret == 0, "TC filter loaded without enabling TC offloads") + check_extack(err, "Error: TC offload is disabled on net device.", args) sim.wait_for_flush() sim.set_ethtool_tc_offloads(True) @@ -530,13 +656,44 @@ try: sim.dfs["bpf_tc_non_bound_accept"] = "N" start_test("Test TC cBPF unbound bytecode doesn't offload...") - ret, _ = sim.cls_bpf_add_filter(bytecode, skip_sw=True, fail=False) + ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True, + fail=False, include_stderr=True) fail(ret == 0, "TC bytecode loaded for offload") + check_extack_nsim(err, "netdevsim configured to reject unbound programs.", + args) sim.wait_for_flush() + start_test("Test TC replace...") + sim.cls_bpf_add_filter(obj, prio=1, handle=1) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test TC replace bad flags...") + for i in range(3): + for j in range(3): + ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_sw=(j == 1), skip_hw=(j == 2), + fail=False) + fail(bool(ret) != bool(j), + "Software TC incorrect load in replace test, iteration %d" % + (j)) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.tc_flush_filters() + start_test("Test TC offloads work...") - ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) fail(ret != 0, "TC filter did not load with TC offloads enabled") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test TC offload basics...") dfs = sim.dfs_get_bound_progs(expected=1) @@ -612,16 +769,24 @@ try: "Device parameters reported for non-offloaded program") start_test("Test XDP prog replace with bad flags...") - ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False) + ret, _, err = sim.set_xdp(obj, "offload", force=True, + fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") - ret, _ = sim.set_xdp(obj, "", force=True, fail=False) + check_extack_nsim(err, "program loaded with different flags.", args) + ret, _, err = sim.set_xdp(obj, "", force=True, + fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") + check_extack_nsim(err, "program loaded with different flags.", args) start_test("Test XDP prog remove with bad flags...") - ret, _ = sim.unset_xdp("offload", force=True, fail=False) + ret, _, err = sim.unset_xdp("offload", force=True, + fail=False, include_stderr=True) fail(ret == 0, "Removed program with a bad mode mode") - ret, _ = sim.unset_xdp("", force=True, fail=False) + check_extack_nsim(err, "program loaded with different flags.", args) + ret, _, err = sim.unset_xdp("", force=True, + fail=False, include_stderr=True) fail(ret == 0, "Removed program with a bad mode mode") + check_extack_nsim(err, "program loaded with different flags.", args) start_test("Test MTU restrictions...") ret, _ = sim.set_mtu(9000, fail=False) @@ -630,18 +795,20 @@ try: sim.unset_xdp("drv") bpftool_prog_list_wait(expected=0) sim.set_mtu(9000) - ret, _ = sim.set_xdp(obj, "drv", fail=False) + ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True) fail(ret == 0, "Driver should refuse to load program with MTU of 9000...") + check_extack_nsim(err, "MTU too large w/ XDP enabled.", args) sim.set_mtu(1500) sim.wait_for_flush() start_test("Test XDP offload...") - sim.set_xdp(obj, "offload") + _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True) ipl = sim.ip_link_show(xdp=True) link_xdp = ipl["xdp"]["prog"] progs = bpftool_prog_list(expected=1) prog = progs[0] fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test XDP offload is device bound...") dfs = sim.dfs_get_bound_progs(expected=1) @@ -667,25 +834,32 @@ try: sim2.set_xdp(obj, "offload") pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") - ret, _ = sim.set_xdp(pinned, "offload", fail=False) + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) fail(ret == 0, "Pinned program loaded for a different device accepted") + check_extack_nsim(err, "program bound to different dev.", args) sim2.remove() - ret, _ = sim.set_xdp(pinned, "offload", fail=False) + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) fail(ret == 0, "Pinned program loaded for a removed device accepted") + check_extack_nsim(err, "xdpoffload of non-bound program.", args) rm(pin_file) bpftool_prog_list_wait(expected=0) start_test("Test mixing of TC and XDP...") sim.tc_add_ingress() sim.set_xdp(obj, "offload") - ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) fail(ret == 0, "Loading TC when XDP active should fail") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) sim.unset_xdp("offload") sim.wait_for_flush() sim.cls_bpf_add_filter(obj, skip_sw=True) - ret, _ = sim.set_xdp(obj, "offload", fail=False) + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) fail(ret == 0, "Loading XDP when TC active should fail") + check_extack_nsim(err, "TC program is already loaded.", args) start_test("Test binding TC from pinned...") pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") @@ -708,8 +882,10 @@ try: start_test("Test asking for TC offload of two filters...") sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) - ret, _ = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, fail=False) + ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, + fail=False, include_stderr=True) fail(ret == 0, "Managed to offload two TC filters at the same time") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) sim.tc_flush_filters(bound=2, total=2) @@ -739,8 +915,9 @@ try: bpftool_prog_list_wait(expected=0) sim = NetdevSim() - sim.set_ethtool_tc_offloads(True) - sim.set_xdp(obj, "offload") + map_obj = bpf_obj("sample_map_ret0.o") + start_test("Test loading program with maps...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON start_test("Test bpftool bound info reporting (own ns)...") check_dev_info(False, "") @@ -757,11 +934,111 @@ try: sim.set_ns("") check_dev_info(False, "") - pin_file, _ = pin_prog("/sys/fs/bpf/tmp") + prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") + map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) sim.remove() start_test("Test bpftool bound info reporting (removed dev)...") - check_dev_info(True, "", pin_file=pin_file, removed=True) + check_dev_info_removed(prog_file=prog_file, map_file=map_file) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + + start_test("Test map update (no flags)...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + maps = bpftool_map_list(expected=2) + array = maps[0] if maps[0]["type"] == "array" else maps[1] + htab = maps[0] if maps[0]["type"] == "hash" else maps[1] + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, _ = bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "added too many entries") + + start_test("Test map update (exists)...") + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, err = bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "updated non-existing key") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map update (noexist)...") + for m in maps: + for i in range(2): + ret, err = bpftool("map update id %d key %s value %s noexist" % + (m["id"], int2str("I", i), int2str("Q", i * 3)), + fail=False) + fail(ret == 0, "updated existing key") + fail(err["error"].find("File exists") == -1, + "expected EEXIST, error is '%s'" % (err["error"])) + + start_test("Test map dump...") + for m in maps: + _, entries = bpftool("map dump id %d" % (m["id"])) + for i in range(2): + key = str2int(entries[i]["key"]) + fail(key != i, "expected key %d, got %d" % (key, i)) + val = str2int(entries[i]["value"]) + fail(val != i * 3, "expected value %d, got %d" % (val, i * 3)) + + start_test("Test map getnext...") + for m in maps: + _, entry = bpftool("map getnext id %d" % (m["id"])) + key = str2int(entry["next_key"]) + fail(key != 0, "next key %d, expected %d" % (key, 0)) + _, entry = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 0))) + key = str2int(entry["next_key"]) + fail(key != 1, "next key %d, expected %d" % (key, 1)) + ret, err = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 1)), fail=False) + fail(ret == 0, "got next key past the end of map") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map delete (htab)...") + for i in range(2): + bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i))) + + start_test("Test map delete (array)...") + for i in range(2): + ret, err = bpftool("map delete id %d key %s" % + (htab["id"], int2str("I", i)), fail=False) + fail(ret == 0, "removed entry from an array") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map remove...") + sim.unset_xdp("offload") + bpftool_map_list_wait(expected=0) + sim.remove() + + sim = NetdevSim() + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + sim.remove() + bpftool_map_list_wait(expected=0) + + start_test("Test map creation fail path...") + sim = NetdevSim() + sim.dfs["bpf_map_accept"] = "N" + ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) + fail(ret == 0, + "netdevsim didn't refuse to create a map with offload disabled") print("%s: OK" % (os.path.basename(__file__))) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 960179882a1c..fb82d29ee863 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -29,6 +29,7 @@ #include <linux/filter.h> #include <linux/bpf_perf_event.h> #include <linux/bpf.h> +#include <linux/if_ether.h> #include <bpf/bpf.h> @@ -49,6 +50,8 @@ #define MAX_INSNS 512 #define MAX_FIXUPS 8 #define MAX_NR_MAPS 4 +#define POINTER_VALUE 0xcafe4all +#define TEST_DATA_LEN 64 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -62,6 +65,7 @@ struct bpf_test { int fixup_map_in_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; + uint32_t retval; enum { UNDEF, ACCEPT, @@ -95,6 +99,94 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = -3, + }, + { + "DIV32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "empty prog", + .insns = { + }, + .errstr = "last insn is not an exit or jmp", + .result = REJECT, + }, + { + "only exit insn", + .insns = { + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, }, { "unreachable", @@ -210,6 +302,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "test8 ld_imm64", @@ -517,6 +610,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, + .retval = POINTER_VALUE, }, { "check valid spill/fill, skb mark", @@ -803,6 +897,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, + .retval = -ENOENT, }, { "jump test 4", @@ -1823,6 +1918,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 0xfaceb00c, }, { "PTR_TO_STACK store/load - bad alignment on off", @@ -1881,6 +1977,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr", + .retval = POINTER_VALUE, }, { "unpriv: add const to pointer", @@ -2054,6 +2151,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -2594,6 +2692,29 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "context stores via ST", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_ST stores into R1 context is not allowed", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "context stores via XADD", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1, + BPF_REG_0, offsetof(struct __sk_buff, mark), 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_XADD stores into R1 context is not allowed", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "direct packet access: test1", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -2818,6 +2939,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test12 (and, good access)", @@ -2842,6 +2964,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test13 (branches, good access)", @@ -2872,6 +2995,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", @@ -2895,6 +3019,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test15 (spill with xadd)", @@ -3181,6 +3306,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test28 (marking on <=, bad access)", @@ -4313,7 +4439,8 @@ static struct bpf_test tests[] = { .fixup_map1 = { 2 }, .errstr_unpriv = "R2 leaks addr into mem", .result_unpriv = REJECT, - .result = ACCEPT, + .result = REJECT, + .errstr = "BPF_XADD stores into R1 context is not allowed", }, { "leak pointer into ctx 2", @@ -4327,7 +4454,8 @@ static struct bpf_test tests[] = { }, .errstr_unpriv = "R10 leaks addr into mem", .result_unpriv = REJECT, - .result = ACCEPT, + .result = REJECT, + .errstr = "BPF_XADD stores into R1 context is not allowed", }, { "leak pointer into ctx 3", @@ -5798,6 +5926,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 0 /* csum_diff of 64-byte packet */, }, { "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", @@ -6166,6 +6295,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 42 /* ultimate return value */, }, { "ld_ind: check calling conv, r1", @@ -6237,6 +6367,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "check bpf_perf_event_data->sample_period byte load permitted", @@ -6708,7 +6839,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, .fixup_map1 = { 4 }, - .errstr = "unbounded min value", + .errstr = "R0 invalid mem access 'inv'", .result = REJECT, }, { @@ -7224,6 +7355,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .result = ACCEPT, + .retval = POINTER_VALUE, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr as return value" }, @@ -7244,6 +7376,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .result = ACCEPT, + .retval = POINTER_VALUE, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr as return value" }, @@ -7685,6 +7818,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = TEST_DATA_LEN, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -8610,6 +8744,127 @@ static struct bpf_test tests[] = { .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "check deducing bounds from const, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check deducing bounds from const, 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check deducing bounds from const, 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, ~0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + }, + { + "check deducing bounds from const, 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, ~0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + }, + { + "check deducing bounds from const, 9", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 10", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), + /* Marks reg as unknown. */ + BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "math between ctx pointer and register with unbounded min value is not allowed", + }, + { "bpf_exit with invalid return code. test1", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), @@ -8705,6 +8960,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "function calls to other bpf functions are allowed for root only", .result_unpriv = REJECT, .result = ACCEPT, + .retval = 1, }, { "calls: overlapping caller/callee", @@ -8900,6 +9156,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_ACT, .result = ACCEPT, + .retval = TEST_DATA_LEN, }, { "calls: callee using args1", @@ -8912,6 +9169,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "allowed for root only", .result_unpriv = REJECT, .result = ACCEPT, + .retval = POINTER_VALUE, }, { "calls: callee using wrong args2", @@ -8942,6 +9200,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "allowed for root only", .result_unpriv = REJECT, .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN, }, { "calls: callee changing pkt pointers", @@ -8990,6 +9249,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN, }, { "calls: calls with stack arith", @@ -9008,6 +9268,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 42, }, { "calls: calls with misaligned stack access", @@ -9041,6 +9302,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 43, }, { "calls: calls control flow, jump test 2", @@ -9533,6 +9795,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_XDP, .result = ACCEPT, + .retval = 42, }, { "calls: write into callee stack frame", @@ -10144,6 +10407,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = POINTER_VALUE, }, { "calls: pkt_ptr spill into caller stack 2", @@ -10209,6 +10473,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 1, }, { "calls: pkt_ptr spill into caller stack 4", @@ -10242,6 +10507,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 1, }, { "calls: pkt_ptr spill into caller stack 5", @@ -10650,10 +10916,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int fd_prog, expected_ret, reject_from_alignment; struct bpf_insn *prog = test->insns; int prog_len = probe_filter_length(prog); + char data_in[TEST_DATA_LEN] = {}; int prog_type = test->prog_type; int map_fds[MAX_NR_MAPS]; const char *expected_err; - int i; + uint32_t retval; + int i, err; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; @@ -10696,6 +10964,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } } + if (fd_prog >= 0) { + err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), + NULL, NULL, &retval, NULL); + if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { + printf("Unexpected bpf_prog_test_run error\n"); + goto fail_log; + } + if (!err && retval != test->retval && + test->retval != POINTER_VALUE) { + printf("FAIL retval %d != %d\n", retval, test->retval); + goto fail_log; + } + } (*passes)++; printf("OK%s\n", reject_from_alignment ? " (NOTE: reject due to unknown alignment)" : ""); diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b4b69c2d1012..9dea96380339 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1310,7 +1310,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma) && !logging_active) { + if (vma_kernel_pagesize(vma) == PMD_SIZE && !logging_active) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 62310122ee78..743ca5cb05ef 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -285,9 +285,11 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; - ret = vgic_v4_init(kvm); - if (ret) - goto out; + if (vgic_has_its(kvm)) { + ret = vgic_v4_init(kvm); + if (ret) + goto out; + } kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_enable(vcpu); diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 4a37292855bc..bc4265154bac 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -118,7 +118,7 @@ int vgic_v4_init(struct kvm *kvm) struct kvm_vcpu *vcpu; int i, nr_vcpus, ret; - if (!vgic_supports_direct_msis(kvm)) + if (!kvm_vgic_global_state.has_gicv4) return 0; /* Nothing to see here... move along. */ if (dist->its_vm.vpes) |