diff options
585 files changed, 17293 insertions, 4689 deletions
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt index 42c3bb2d53e8..01e331a5f3e7 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt @@ -41,9 +41,9 @@ Required properties: Optional properties: In order to use the GPIO lines in PWM mode, some additional optional -properties are required. Only Armada 370 and XP support these properties. +properties are required. -- compatible: Must contain "marvell,armada-370-xp-gpio" +- compatible: Must contain "marvell,armada-370-gpio" - reg: an additional register set is needed, for the GPIO Blink Counter on/off registers. @@ -71,7 +71,7 @@ Example: }; gpio1: gpio@18140 { - compatible = "marvell,armada-370-xp-gpio"; + compatible = "marvell,armada-370-gpio"; reg = <0x18140 0x40>, <0x181c8 0x08>; reg-names = "gpio", "pwm"; ngpios = <17>; diff --git a/Documentation/devicetree/bindings/mfd/stm32-timers.txt b/Documentation/devicetree/bindings/mfd/stm32-timers.txt index bbd083f5600a..1db6e0057a63 100644 --- a/Documentation/devicetree/bindings/mfd/stm32-timers.txt +++ b/Documentation/devicetree/bindings/mfd/stm32-timers.txt @@ -31,7 +31,7 @@ Example: compatible = "st,stm32-timers"; reg = <0x40010000 0x400>; clocks = <&rcc 0 160>; - clock-names = "clk_int"; + clock-names = "int"; pwm { compatible = "st,stm32-pwm"; diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index 1506e948610c..27966ae741e0 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -22,6 +22,7 @@ Required properties: Required elements: 'pclk', 'hclk' Optional elements: 'tx_clk' Optional elements: 'rx_clk' applies to cdns,zynqmp-gem + Optional elements: 'tsu_clk' - clocks: Phandles to input clocks. Optional properties for PHY child node: diff --git a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt index c627bbb3009e..60c833d62181 100644 --- a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt +++ b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt @@ -13,14 +13,10 @@ Optional SoC Specific Properties: - pinctrl-names: Contains only one value - "default". - pintctrl-0: Specifies the pin control groups used for this controller. - autosuspend-delay: Specify autosuspend delay in milliseconds. -- vin-voltage-override: Specify voltage of VIN pin in microvolts. - irq-status-read-quirk: Specify that the trf7970a being used has the "IRQ Status Read" erratum. - en2-rf-quirk: Specify that the trf7970a being used has the "EN2 RF" erratum. -- t5t-rmb-extra-byte-quirk: Specify that the trf7970a has the erratum - where an extra byte is returned by Read Multiple Block commands issued - to Type 5 tags. - vdd-io-supply: Regulator specifying voltage for vdd-io - clock-frequency: Set to specify that the input frequency to the trf7970a is 13560000Hz or 27120000Hz @@ -37,15 +33,13 @@ Example (for ARM-based BeagleBone with TRF7970A on SPI1): spi-max-frequency = <2000000>; interrupt-parent = <&gpio2>; interrupts = <14 0>; - ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>, - <&gpio2 5 GPIO_ACTIVE_LOW>; + ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_HIGH>, + <&gpio2 5 GPIO_ACTIVE_HIGH>; vin-supply = <&ldo3_reg>; - vin-voltage-override = <5000000>; vdd-io-supply = <&ldo2_reg>; autosuspend-delay = <30000>; irq-status-read-quirk; en2-rf-quirk; - t5t-rmb-extra-byte-quirk; clock-frequency = <27120000>; status = "okay"; }; diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt index 24196cef7c91..1fe42a874aae 100644 --- a/Documentation/networking/ipvlan.txt +++ b/Documentation/networking/ipvlan.txt @@ -22,9 +22,9 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module There are no module parameters for this driver and it can be configured using IProute2/ip utility. - ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | l3 | l3s } + ip link add link <master-dev> name <slave-dev> type ipvlan mode { l2 | l3 | l3s } - e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 + e.g. ip link add link eth0 name ipvl0 type ipvlan mode l2 4. Operating modes: diff --git a/Documentation/networking/policy-routing.txt b/Documentation/networking/policy-routing.txt deleted file mode 100644 index 36f6936d7f21..000000000000 --- a/Documentation/networking/policy-routing.txt +++ /dev/null @@ -1,150 +0,0 @@ -Classes -------- - - "Class" is a complete routing table in common sense. - I.e. it is tree of nodes (destination prefix, tos, metric) - with attached information: gateway, device etc. - This tree is looked up as specified in RFC1812 5.2.4.3 - 1. Basic match - 2. Longest match - 3. Weak TOS. - 4. Metric. (should not be in kernel space, but they are) - 5. Additional pruning rules. (not in kernel space). - - We have two special type of nodes: - REJECT - abort route lookup and return an error value. - THROW - abort route lookup in this class. - - - Currently the number of classes is limited to 255 - (0 is reserved for "not specified class") - - Three classes are builtin: - - RT_CLASS_LOCAL=255 - local interface addresses, - broadcasts, nat addresses. - - RT_CLASS_MAIN=254 - all normal routes are put there - by default. - - RT_CLASS_DEFAULT=253 - if ip_fib_model==1, then - normal default routes are put there, if ip_fib_model==2 - all gateway routes are put there. - - -Rules ------ - Rule is a record of (src prefix, src interface, tos, dst prefix) - with attached information. - - Rule types: - RTP_ROUTE - lookup in attached class - RTP_NAT - lookup in attached class and if a match is found, - translate packet source address. - RTP_MASQUERADE - lookup in attached class and if a match is found, - masquerade packet as sourced by us. - RTP_DROP - silently drop the packet. - RTP_REJECT - drop the packet and send ICMP NET UNREACHABLE. - RTP_PROHIBIT - drop the packet and send ICMP COMM. ADM. PROHIBITED. - - Rule flags: - RTRF_LOG - log route creations. - RTRF_VALVE - One way route (used with masquerading) - -Default setup: - -root@amber:/pub/ip-routing # iproute -r -Kernel routing policy rules -Pref Source Destination TOS Iface Cl - 0 default default 00 * 255 - 254 default default 00 * 254 - 255 default default 00 * 253 - - -Lookup algorithm ----------------- - - We scan rules list, and if a rule is matched, apply it. - If a route is found, return it. - If it is not found or a THROW node was matched, continue - to scan rules. - -Applications ------------- - -1. Just ignore classes. All the routes are put into MAIN class - (and/or into DEFAULT class). - - HOWTO: iproute add PREFIX [ tos TOS ] [ gw GW ] [ dev DEV ] - [ metric METRIC ] [ reject ] ... (look at iproute utility) - - or use route utility from current net-tools. - -2. Opposite case. Just forget all that you know about routing - tables. Every rule is supplied with its own gateway, device - info. record. This approach is not appropriate for automated - route maintenance, but it is ideal for manual configuration. - - HOWTO: iproute addrule [ from PREFIX ] [ to PREFIX ] [ tos TOS ] - [ dev INPUTDEV] [ pref PREFERENCE ] route [ gw GATEWAY ] - [ dev OUTDEV ] ..... - - Warning: As of now the size of the routing table in this - approach is limited to 256. If someone likes this model, I'll - relax this limitation. - -3. OSPF classes (see RFC1583, RFC1812 E.3.3) - Very clean, stable and robust algorithm for OSPF routing - domains. Unfortunately, it is not widely used in the Internet. - - Proposed setup: - 255 local addresses - 254 interface routes - 253 ASE routes with external metric - 252 ASE routes with internal metric - 251 inter-area routes - 250 intra-area routes for 1st area - 249 intra-area routes for 2nd area - etc. - - Rules: - iproute addrule class 253 - iproute addrule class 252 - iproute addrule class 251 - iproute addrule to a-prefix-for-1st-area class 250 - iproute addrule to another-prefix-for-1st-area class 250 - ... - iproute addrule to a-prefix-for-2nd-area class 249 - ... - - Area classes must be terminated with reject record. - iproute add default reject class 250 - iproute add default reject class 249 - ... - -4. The Variant Router Requirements Algorithm (RFC1812 E.3.2) - Create 16 classes for different TOS values. - It is a funny, but pretty useless algorithm. - I listed it just to show the power of new routing code. - -5. All the variety of combinations...... - - -GATED ------ - - Gated does not understand classes, but it will work - happily in MAIN+DEFAULT. All policy routes can be set - and maintained manually. - -IMPORTANT NOTE --------------- - route.c has a compilation time switch CONFIG_IP_LOCAL_RT_POLICY. - If it is set, locally originated packets are routed - using all the policy list. This is not very convenient and - pretty ambiguous when used with NAT and masquerading. - I set it to FALSE by default. - - -Alexey Kuznetov diff --git a/MAINTAINERS b/MAINTAINERS index f81e1b765353..5bebe20811c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2967,7 +2967,7 @@ F: sound/pci/oxygen/ C6X ARCHITECTURE M: Mark Salter <[email protected]> -M: Aurelien Jacquiot <[email protected]> +M: Aurelien Jacquiot <[email protected]> W: http://www.linux-c6x.org/wiki/index.php/Main_Page S: Maintained @@ -8330,6 +8330,16 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/ F: drivers/net/ethernet/mellanox/mlx5/core/fpga/* F: include/linux/mlx5/mlx5_ifc_fpga.h +MELLANOX ETHERNET INNOVA IPSEC DRIVER +M: Ilan Tayari <[email protected]> +R: Boris Pismenny <[email protected]> +S: Supported +W: http://www.mellanox.com +Q: http://patchwork.ozlabs.org/project/netdev/list/ +F: drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/* +F: drivers/net/ethernet/mellanox/mlx5/core/ipsec* + MELLANOX ETHERNET SWITCH DRIVERS M: Jiri Pirko <[email protected]> M: Ido Schimmel <[email protected]> @@ -9072,9 +9082,6 @@ F: include/uapi/linux/nfc.h F: drivers/nfc/ F: include/linux/platform_data/nfcmrvl.h F: include/linux/platform_data/nxp-nci.h -F: include/linux/platform_data/pn544.h -F: include/linux/platform_data/st21nfca.h -F: include/linux/platform_data/st-nci.h F: Documentation/devicetree/bindings/net/nfc/ NFS, SUNRPC, AND LOCKD CLIENTS @@ -11409,6 +11416,14 @@ F: kernel/time/alarmtimer.c F: kernel/time/ntp.c F: tools/testing/selftests/timers/ +TI TRF7970A NFC DRIVER +M: Mark Greer <[email protected]> +L: [email protected] (moderated for non-subscribers) +S: Supported +F: drivers/nfc/trf7970a.c +F: Documentation/devicetree/bindings/net/nfc/trf7970a.txt + SC1200 WDT DRIVER M: Zwane Mwaikambo <[email protected]> S: Maintained @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION* @@ -1437,7 +1437,7 @@ help: @echo ' make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build' @echo ' make V=2 [targets] 2 => give reason for rebuild of target' @echo ' make O=dir [targets] Locate all output files in "dir", including .config' - @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)' + @echo ' make C=1 [targets] Check re-compiled c source with $$CHECK (sparse by default)' @echo ' make C=2 [targets] Force check of all c source with $$CHECK' @echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections' @echo ' make W=n [targets] Enable extra gcc checks, n=1,2,3 where' diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 6e1242da0159..4104a0839214 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -86,8 +86,6 @@ struct task_struct; #define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4) #define TSK_K_FP(tsk) TSK_K_REG(tsk, 0) -#define thread_saved_pc(tsk) TSK_K_BLINK(tsk) - extern void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4c1a35f15838..c0fcab6a5504 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1416,6 +1416,7 @@ choice config VMSPLIT_3G bool "3G/1G user/kernel split" config VMSPLIT_3G_OPT + depends on !ARM_LPAE bool "3G/1G user/kernel split (for full 1G low memory)" config VMSPLIT_2G bool "2G/2G user/kernel split" diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S index 3f7d1b74c5e0..a17ca8d78656 100644 --- a/arch/arm/boot/compressed/efi-header.S +++ b/arch/arm/boot/compressed/efi-header.S @@ -17,7 +17,8 @@ @ there. .inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000 #else - W(mov) r0, r0 + AR_CLASS( mov r0, r0 ) + M_CLASS( nop.w ) #endif .endm diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts index dd3525a0f06a..9e8b082c134f 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts @@ -57,7 +57,6 @@ aliases { serial0 = &uart0; /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */ - ethernet0 = &emac; ethernet1 = &xr819; }; @@ -104,13 +103,6 @@ status = "okay"; }; -&emac { - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - status = "okay"; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins_a>; diff --git a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts index 78f6c24952dd..8d2cc6e9a03f 100644 --- a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts +++ b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts @@ -46,10 +46,3 @@ model = "FriendlyARM NanoPi NEO"; compatible = "friendlyarm,nanopi-neo", "allwinner,sun8i-h3"; }; - -&emac { - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - status = "okay"; -}; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts index cedd326b6089..5b6d14555b7c 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts @@ -54,7 +54,6 @@ aliases { serial0 = &uart0; /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */ - ethernet0 = &emac; ethernet1 = &rtl8189; }; @@ -109,13 +108,6 @@ status = "okay"; }; -&emac { - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - status = "okay"; -}; - &ir { pinctrl-names = "default"; pinctrl-0 = <&ir_pins_a>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts index 6880268e8b87..5fea430e0eb1 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts @@ -52,7 +52,6 @@ compatible = "xunlong,orangepi-one", "allwinner,sun8i-h3"; aliases { - ethernet0 = &emac; serial0 = &uart0; }; @@ -98,13 +97,6 @@ status = "okay"; }; -&emac { - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - status = "okay"; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts index a10281b455f5..8b93f5c781a7 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts @@ -53,11 +53,6 @@ }; }; -&emac { - /* LEDs changed to active high on the plus */ - /delete-property/ allwinner,leds-active-low; -}; - &mmc1 { pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins_a>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts index 52e65755c51a..f148111c326d 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts @@ -52,7 +52,6 @@ compatible = "xunlong,orangepi-pc", "allwinner,sun8i-h3"; aliases { - ethernet0 = &emac; serial0 = &uart0; }; @@ -110,13 +109,6 @@ status = "okay"; }; -&emac { - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - status = "okay"; -}; - &ir { pinctrl-names = "default"; pinctrl-0 = <&ir_pins_a>; diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index a6d4fda544e1..d4f600dbb7eb 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -83,12 +83,6 @@ #size-cells = <1>; ranges; - syscon: syscon@1c00000 { - compatible = "allwinner,sun8i-h3-system-controller", - "syscon"; - reg = <0x01c00000 0x1000>; - }; - dma: dma-controller@01c02000 { compatible = "allwinner,sun8i-h3-dma"; reg = <0x01c02000 0x1000>; @@ -285,14 +279,6 @@ interrupt-controller; #interrupt-cells = <3>; - emac_rgmii_pins: emac0 { - pins = "PD0", "PD1", "PD2", "PD3", "PD4", - "PD5", "PD7", "PD8", "PD9", "PD10", - "PD12", "PD13", "PD15", "PD16", "PD17"; - function = "emac"; - drive-strength = <40>; - }; - i2c0_pins: i2c0 { pins = "PA11", "PA12"; function = "i2c0"; @@ -389,32 +375,6 @@ clocks = <&osc24M>; }; - emac: ethernet@1c30000 { - compatible = "allwinner,sun8i-h3-emac"; - syscon = <&syscon>; - reg = <0x01c30000 0x104>; - interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "macirq"; - resets = <&ccu RST_BUS_EMAC>; - reset-names = "stmmaceth"; - clocks = <&ccu CLK_BUS_EMAC>; - clock-names = "stmmaceth"; - #address-cells = <1>; - #size-cells = <0>; - status = "disabled"; - - mdio: mdio { - #address-cells = <1>; - #size-cells = <0>; - int_mii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - clocks = <&ccu CLK_BUS_EPHY>; - resets = <&ccu RST_BUS_EPHY>; - }; - }; - }; - spi0: spi@01c68000 { compatible = "allwinner,sun8i-h3-spi"; reg = <0x01c68000 0x1000>; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 6da6af8881f7..2685e03600b1 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -257,7 +257,6 @@ CONFIG_SMSC911X=y CONFIG_STMMAC_ETH=y CONFIG_STMMAC_PLATFORM=y CONFIG_DWMAC_DWC_QOS_ETH=y -CONFIG_DWMAC_SUN8I=y CONFIG_TI_CPSW=y CONFIG_XILINX_EMACLITE=y CONFIG_AT803X_PHY=y diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 504e02238031..5cd5dd70bc83 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -40,7 +40,6 @@ CONFIG_ATA=y CONFIG_AHCI_SUNXI=y CONFIG_NETDEVICES=y CONFIG_SUN4I_EMAC=y -CONFIG_DWMAC_SUN8I=y # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 32e1a9513dc7..4e80bf7420d4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -315,7 +315,7 @@ static void __init cacheid_init(void) if (arch >= CPU_ARCH_ARMv6) { unsigned int cachetype = read_cpuid_cachetype(); - if ((arch == CPU_ARCH_ARMv7M) && !cachetype) { + if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) { cacheid = 0; } else if ((cachetype & (7 << 29)) == 4 << 29) { /* ARMv7 register format */ diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 0d1f026d831a..6872135d7f84 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -67,14 +67,6 @@ }; }; -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; - phy-handle = <&ext_rgmii_phy>; - status = "okay"; -}; - &i2c1 { pinctrl-names = "default"; pinctrl-0 = <&i2c1_pins>; @@ -85,13 +77,6 @@ bias-pull-up; }; -&mdio { - ext_rgmii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index 24f1aac366d6..790d14daaa6a 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -46,20 +46,5 @@ model = "Pine64+"; compatible = "pine64,pine64-plus", "allwinner,sun50i-a64"; - /* TODO: Camera, touchscreen, etc. */ -}; - -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; - phy-handle = <&ext_rgmii_phy>; - status = "okay"; -}; - -&mdio { - ext_rgmii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; + /* TODO: Camera, Ethernet PHY, touchscreen, etc. */ }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts index 3b491c0e3b0d..c680ed385da3 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts @@ -70,15 +70,6 @@ status = "okay"; }; -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&rmii_pins>; - phy-mode = "rmii"; - phy-handle = <&ext_rmii_phy1>; - status = "okay"; - -}; - &i2c1 { pinctrl-names = "default"; pinctrl-0 = <&i2c1_pins>; @@ -89,13 +80,6 @@ bias-pull-up; }; -&mdio { - ext_rmii_phy1: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 769ced01a998..166c9ef884dc 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -129,12 +129,6 @@ #size-cells = <1>; ranges; - syscon: syscon@1c00000 { - compatible = "allwinner,sun50i-a64-system-controller", - "syscon"; - reg = <0x01c00000 0x1000>; - }; - mmc0: mmc@1c0f000 { compatible = "allwinner,sun50i-a64-mmc"; reg = <0x01c0f000 0x1000>; @@ -287,21 +281,6 @@ bias-pull-up; }; - rmii_pins: rmii_pins { - pins = "PD10", "PD11", "PD13", "PD14", "PD17", - "PD18", "PD19", "PD20", "PD22", "PD23"; - function = "emac"; - drive-strength = <40>; - }; - - rgmii_pins: rgmii_pins { - pins = "PD8", "PD9", "PD10", "PD11", "PD12", - "PD13", "PD15", "PD16", "PD17", "PD18", - "PD19", "PD20", "PD21", "PD22", "PD23"; - function = "emac"; - drive-strength = <40>; - }; - uart0_pins_a: uart0@0 { pins = "PB8", "PB9"; function = "uart0"; @@ -406,26 +385,6 @@ #size-cells = <0>; }; - emac: ethernet@1c30000 { - compatible = "allwinner,sun50i-a64-emac"; - syscon = <&syscon>; - reg = <0x01c30000 0x100>; - interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "macirq"; - resets = <&ccu RST_BUS_EMAC>; - reset-names = "stmmaceth"; - clocks = <&ccu CLK_BUS_EMAC>; - clock-names = "stmmaceth"; - status = "disabled"; - #address-cells = <1>; - #size-cells = <0>; - - mdio: mdio { - #address-cells = <1>; - #size-cells = <0>; - }; - }; - gic: interrupt-controller@1c81000 { compatible = "arm,gic-400"; reg = <0x01c81000 0x1000>, diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index d789858c4f1b..97c123e09e45 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -191,7 +191,6 @@ CONFIG_RAVB=y CONFIG_SMC91X=y CONFIG_SMSC911X=y CONFIG_STMMAC_ETH=m -CONFIG_DWMAC_SUN8I=m CONFIG_MDIO_BUS_MUX_MMIOREG=y CONFIG_MESON_GXL_PHY=m CONFIG_MICREL_PHY=y diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41b6e31f8f55..d0cb007fa482 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk) /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; vdso_data->raw_time_sec = tk->raw_time.tv_sec; - vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; + vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec << + tk->tkr_raw.shift) + + tk->tkr_raw.xtime_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - /* tkr_raw.xtime_nsec == 0 */ vdso_data->cs_mono_mult = tk->tkr_mono.mult; vdso_data->cs_raw_mult = tk->tkr_raw.mult; /* tkr_mono.shift == tkr_raw.shift */ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index e00b4671bd7c..76320e920965 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -256,7 +256,6 @@ monotonic_raw: seqcnt_check fail=monotonic_raw /* All computations are done with left-shifted nsecs. */ - lsl x14, x14, x12 get_nsec_per_sec res=x9 lsl x9, x9, x12 diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h index 85d4af97c986..dbdbb8a558df 100644 --- a/arch/blackfin/include/asm/processor.h +++ b/arch/blackfin/include/asm/processor.h @@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* - * Return saved PC of a blocked thread. - */ -#define thread_saved_pc(tsk) (tsk->thread.pc) - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) \ diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h index b9eb3da7f278..7c87b5be53b5 100644 --- a/arch/c6x/include/asm/processor.h +++ b/arch/c6x/include/asm/processor.h @@ -96,11 +96,6 @@ static inline void release_thread(struct task_struct *dead_task) #define release_segments(mm) do { } while (0) /* - * saved PC of a blocked thread. - */ -#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc) - -/* * saved kernel SP and DP of a blocked thread. */ #ifdef _BIG_ENDIAN diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index e299d30105b5..a2cdb1521aca 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -69,14 +69,6 @@ void hard_reset_now (void) while(1) /* waiting for RETRIBUTION! */ ; } -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *t) -{ - return task_pt_regs(t)->irp; -} - /* setup the child's kernel stack with a pt_regs and switch_stack on it. * it will be un-nested during _resume and _ret_from_sys_call when the * new thread is scheduled. diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index c530a8fa87ce..fe87b383fbf3 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -85,14 +85,6 @@ hard_reset_now(void) } /* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *t) -{ - return task_pt_regs(t)->erp; -} - -/* * Setup the child's kernel stack with a pt_regs and call switch_stack() on it. * It will be unnested during _resume and _ret_from_sys_call when the new thread * is scheduled. diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index 15b815df29c1..bc2729e4b2c9 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp) -extern unsigned long thread_saved_pc(struct task_struct *tsk); - /* Free all resources held by a thread. */ static inline void release_thread(struct task_struct *dead_task) { diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h index ddaeb9cc9143..e4d08d74ed9f 100644 --- a/arch/frv/include/asm/processor.h +++ b/arch/frv/include/asm/processor.h @@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ... #define release_segments(mm) do { } while (0) #define forget_segments() do { } while (0) -/* - * Return saved PC of a blocked thread. - */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) ((tsk)->thread.frame0->pc) diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 5a4c92abc99e..a957b374e3a6 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p) return 0; } -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - /* Check whether the thread is blocked in resume() */ - if (in_sched_functions(tsk->thread.pc)) - return ((unsigned long *)tsk->thread.fp)[2]; - else - return tsk->thread.pc; -} - int elf_check_arch(const struct elf32_hdr *hdr) { unsigned long hsr0 = __get_HSR(0); diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h index 65132d7ae9e5..afa53147e66a 100644 --- a/arch/h8300/include/asm/processor.h +++ b/arch/h8300/include/asm/processor.h @@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk); unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) \ diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 0f5db5bb561b..d1ddcabbbe83 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags, return 0; } -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((struct pt_regs *)tsk->thread.esp0)->pc; -} - unsigned long get_wchan(struct task_struct *p) { unsigned long fp, pc; diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h index 45a825402f63..ce67940860a5 100644 --- a/arch/hexagon/include/asm/processor.h +++ b/arch/hexagon/include/asm/processor.h @@ -33,9 +33,6 @@ /* task_struct, defined elsewhere, is the "process descriptor" */ struct task_struct; -/* this is defined in arch/process.c */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - extern void start_thread(struct pt_regs *, unsigned long, unsigned long); /* diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index de715bab7956..656050c2e6a0 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -61,14 +61,6 @@ void arch_cpu_idle(void) } /* - * Return saved PC of a blocked thread - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return 0; -} - -/* * Copy architecture-specific thread state */ int copy_thread(unsigned long clone_flags, unsigned long usp, diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 26a63d69c599..ab982f07ea68 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -602,23 +602,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat) } /* - * Return saved PC of a blocked thread. - * Note that the only way T can block is through a call to schedule() -> switch_to(). - */ -static inline unsigned long -thread_saved_pc (struct task_struct *t) -{ - struct unw_frame_info info; - unsigned long ip; - - unw_init_from_blocked_task(&info, t); - if (unw_unwind(&info) < 0) - return 0; - unw_get_ip(&info, &ip); - return ip; -} - -/* * Get the current instruction/program counter value. */ #define current_text_addr() \ diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index 5767367550c6..657874eeeccc 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *); extern void copy_segments(struct task_struct *p, struct mm_struct * mm); extern void release_segments(struct mm_struct * mm); -extern unsigned long thread_saved_pc(struct task_struct *); - /* Copy and release all segment info associated with a VM */ #define copy_segments(p, mm) do { } while (0) #define release_segments(mm) do { } while (0) diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index d8ffcfec599c..8cd7e03f4370 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -39,14 +39,6 @@ #include <linux/err.h> -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return tsk->thread.lr; -} - void (*pm_power_off)(void) = NULL; EXPORT_SYMBOL(pm_power_off); diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index 77239e81379b..94c36030440c 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -extern unsigned long thread_saved_pc(struct task_struct *tsk); - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) \ diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index e475c945c8b2..7df92f8b0781 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -40,20 +40,6 @@ asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); - -/* - * Return saved PC from a blocked thread - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp; - /* Check whether the thread is blocked in resume() */ - if (in_sched_functions(sw->retpc)) - return ((unsigned long *)sw->a6)[1]; - else - return sw->retpc; -} - void arch_cpu_idle(void) { #if defined(MACH_ATARI_ONLY) diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index 37ef196e4519..330d556860ba 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -extern unsigned long thread_saved_pc(struct task_struct *t); - extern unsigned long get_wchan(struct task_struct *p); # define KSTK_EIP(tsk) (0) @@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* Return saved (kernel) PC of a blocked thread. */ -# define thread_saved_pc(tsk) \ - ((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0) - unsigned long get_wchan(struct task_struct *p); /* The size allocated for kernel stacks. This _must_ be a power of two! */ diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index e92a817e645f..6527ec22f158 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } -#ifndef CONFIG_MMU -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct cpu_context *ctx = - &(((struct thread_info *)(tsk->stack))->cpu_context); - - /* Check whether the thread is blocked in resume() */ - if (in_sched_functions(ctx->r15)) - return (unsigned long)ctx->r15; - else - return ctx->r14; -} -#endif - unsigned long get_wchan(struct task_struct *p) { /* TBD (used by procfs) */ diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index 7c6336dd2638..7cd92166a0b9 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi) int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, bool user, bool kernel) { - int idx_user, idx_kernel; + /* + * Initialize idx_user and idx_kernel to workaround bogus + * maybe-initialized warning when using GCC 6. + */ + int idx_user = 0, idx_kernel = 0; unsigned long flags, old_entryhi; local_irq_save(flags); diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index 18e17abf7664..3ae479117b42 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs, /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -/* - * Return saved PC of a blocked thread. - */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(task) ((task)->thread.uregs) diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index c9fa42619c6a..89e8027e07fb 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -40,14 +40,6 @@ #include "internal.h" /* - * return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((unsigned long *) tsk->thread.sp)[3]; -} - -/* * power off function, if any */ void (*pm_power_off)(void); diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index 3bbbc3d798e5..4944e2e1d8b0 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* Return saved PC of a blocked thread. */ -#define thread_saved_pc(tsk) ((tsk)->thread.kregs->ea) - extern unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(p) \ diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index a908e6c30a00..396d8f306c21 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp); void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); -/* - * Return saved PC of a blocked thread. For now, this is the "user" PC - */ -extern unsigned long thread_saved_pc(struct task_struct *t); - #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 106859ae27ff..f9b77003f113 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs) show_registers(regs); } -unsigned long thread_saved_pc(struct task_struct *t) -{ - return (unsigned long)user_regs(t->stack)->pc; -} - void release_thread(struct task_struct *dead_task) { } diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index a3661ee6b060..4c6694b4e77e 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -163,12 +163,7 @@ struct thread_struct { .flags = 0 \ } -/* - * Return saved PC of a blocked thread. This is used by ps mostly. - */ - struct task_struct; -unsigned long thread_saved_pc(struct task_struct *t); void show_trace(struct task_struct *task, unsigned long *stack); /* diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 4516a5b53f38..b64d7d21646e 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } -unsigned long thread_saved_pc(struct task_struct *t) -{ - return t->thread.regs.kpc; -} - unsigned long get_wchan(struct task_struct *p) { diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index a83821f33ea3..8814a7249ceb 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self, extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_handler(struct pt_regs *regs); extern int kprobe_post_handler(struct pt_regs *regs); +extern int is_current_kprobe_addr(unsigned long addr); #ifdef CONFIG_KPROBES_ON_FTRACE extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index bb99b651085a..1189d04f3bd1 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -378,12 +378,6 @@ struct thread_struct { } #endif -/* - * Return saved PC of a blocked thread. For now, this is the "user" PC - */ -#define thread_saved_pc(tsk) \ - ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0) - #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.regs) unsigned long get_wchan(struct task_struct *p); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ae418b85c17c..b886795060fd 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1411,10 +1411,8 @@ USE_TEXT_SECTION() .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_STD_MMU_64 - andis. r0,r4,0xa410 /* weird error? */ + andis. r0,r4,0xa450 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ - andis. r0,r4,DSISR_DABRMATCH@h - bne- handle_dabr_fault CURRENT_THREAD_INFO(r11, r1) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ @@ -1438,11 +1436,16 @@ do_hash_page: /* Error */ blt- 13f + + /* Reload DSISR into r4 for the DABR check below */ + ld r4,_DSISR(r1) #endif /* CONFIG_PPC_STD_MMU_64 */ /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: -11: ld r4,_DAR(r1) +11: andis. r0,r4,DSISR_DABRMATCH@h + bne- handle_dabr_fault + ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index fc4343514bed..01addfb0ed0a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; +int is_current_kprobe_addr(unsigned long addr) +{ + struct kprobe *p = kprobe_running(); + return (p && (unsigned long)p->addr == addr) ? 1 : 0; +} + bool arch_within_kprobe_blacklist(unsigned long addr) { return (addr >= (unsigned long)__kprobes_text_start && @@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); #endif + /* + * jprobes use jprobe_return() which skips the normal return + * path of the function, and this messes up the accounting of the + * function graph tracer. + * + * Pause function graph tracing while performing the jprobe function. + */ + pause_graph_tracing(); + return 1; } NOKPROBE_SYMBOL(setjmp_pre_handler); @@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) * saved regs... */ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + /* It's OK to start function graph tracing again */ + unpause_graph_tracing(); preempt_enable_no_resched(); return 1; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a8c1f99e9607..4640f6d64f8b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void) #endif /* + * Emergency stacks are used for a range of things, from asynchronous + * NMIs (system reset, machine check) to synchronous, process context. + * We set preempt_count to zero, even though that isn't necessarily correct. To + * get the right value we'd need to copy it from the previous thread_info, but + * doing that might fault causing more problems. + * TODO: what to do with accounting? + */ +static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) +{ + ti->task = NULL; + ti->cpu = cpu; + ti->preempt_count = 0; + ti->local_flags = 0; + ti->flags = 0; + klp_init_thread_info(ti); +} + +/* * Stack space used when we detect a bad kernel stack pointer, and * early in SMP boots before relocation is enabled. Exclusive emergency * stack for machine checks. @@ -633,24 +651,31 @@ void __init emergency_stack_init(void) * Since we use these as temporary stacks during secondary CPU * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. + * + * The IRQ stacks allocated elsewhere in this file are zeroed and + * initialized in kernel/irq.c. These are initialized here in order + * to have emergency stacks available as early as possible. */ limit = min(safe_stack_limit(), ppc64_rma_size); for_each_possible_cpu(i) { struct thread_info *ti; ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; /* emergency stack for machine check exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; #endif } diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 7c933a99f5d5..c98e90b4ea7b 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller) stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ - SAVE_8GPRS(0,r1) - SAVE_8GPRS(8,r1) - SAVE_8GPRS(16,r1) - SAVE_8GPRS(24,r1) + SAVE_GPR(0, r1) + SAVE_10GPRS(2, r1) + SAVE_10GPRS(12, r1) + SAVE_10GPRS(22, r1) + + /* Save previous stack pointer (r1) */ + addi r8, r1, SWITCH_FRAME_SIZE + std r8, GPR1(r1) /* Load special regs for save below */ mfmsr r8 @@ -95,18 +99,44 @@ ftrace_call: bl ftrace_stub nop - /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) - mtctr r3 + /* Load the possibly modified NIP */ + ld r15, _NIP(r1) + #ifdef CONFIG_LIVEPATCH - cmpd r14,r3 /* has NIP been altered? */ + cmpd r14, r15 /* has NIP been altered? */ +#endif + +#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE) + /* NIP has not been altered, skip over further checks */ + beq 1f + + /* Check if there is an active kprobe on us */ + subi r3, r14, 4 + bl is_current_kprobe_addr + nop + + /* + * If r3 == 1, then this is a kprobe/jprobe. + * else, this is livepatched function. + * + * The conditional branch for livepatch_handler below will use the + * result of this comparison. For kprobe/jprobe, we just need to branch to + * the new NIP, not call livepatch_handler. The branch below is bne, so we + * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want + * CR0[EQ] = (r3 == 1). + */ + cmpdi r3, 1 +1: #endif + /* Load CTR with the possibly modified NIP */ + mtctr r15 + /* Restore gprs */ - REST_8GPRS(0,r1) - REST_8GPRS(8,r1) - REST_8GPRS(16,r1) - REST_8GPRS(24,r1) + REST_GPR(0,r1) + REST_10GPRS(2,r1) + REST_10GPRS(12,r1) + REST_10GPRS(22,r1) /* Restore possibly modified LR */ ld r0, _LINK(r1) @@ -119,7 +149,10 @@ ftrace_call: addi r1, r1, SWITCH_FRAME_SIZE #ifdef CONFIG_LIVEPATCH - /* Based on the cmpd above, if the NIP was altered handle livepatch */ + /* + * Based on the cmpd or cmpdi above, if the NIP was altered and we're + * not on a kprobe/jprobe, then handle livepatch. + */ bne- livepatch_handler #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 42b7a4fd57d9..8d1a365b8edc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); break; case KVM_REG_PPC_TB_OFFSET: + /* + * POWER9 DD1 has an erratum where writing TBU40 causes + * the timebase to lose ticks. So we don't let the + * timebase offset be changed on P9 DD1. (It is + * initialized to zero.) + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + break; /* round up to multiple of 2^24 */ vcpu->arch.vcore->tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24); @@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; int srcu_idx; + unsigned long ebb_regs[3] = {}; /* shut up GCC */ + unsigned long user_tar = 0; + unsigned int user_vrsave; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } + /* + * Don't allow entry with a suspended transaction, because + * the guest entry/exit code will lose it. + * If the guest has TM enabled, save away their TM-related SPRs + * (they will get restored by the TM unavailable interrupt). + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && + (current->thread.regs->msr & MSR_TM)) { + if (MSR_TM_ACTIVE(current->thread.regs->msr)) { + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + run->fail_entry.hardware_entry_failure_reason = 0; + return -EINVAL; + } + current->thread.tm_tfhar = mfspr(SPRN_TFHAR); + current->thread.tm_tfiar = mfspr(SPRN_TFIAR); + current->thread.tm_texasr = mfspr(SPRN_TEXASR); + current->thread.regs->msr &= ~MSR_TM; + } +#endif + kvmppc_core_prepare_to_enter(vcpu); /* No need to go into the guest when all we'll do is come back out */ @@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) flush_all_to_thread(current); + /* Save userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + ebb_regs[0] = mfspr(SPRN_EBBHR); + ebb_regs[1] = mfspr(SPRN_EBBRR); + ebb_regs[2] = mfspr(SPRN_BESCR); + user_tar = mfspr(SPRN_TAR); + } + user_vrsave = mfspr(SPRN_VRSAVE); + vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; @@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } } while (is_kvmppc_resume_guest(r)); + /* Restore userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_EBBHR, ebb_regs[0]); + mtspr(SPRN_EBBRR, ebb_regs[1]); + mtspr(SPRN_BESCR, ebb_regs[2]); + mtspr(SPRN_TAR, user_tar); + mtspr(SPRN_FSCR, current->thread.fscr); + } + mtspr(SPRN_VRSAVE, user_vrsave); + out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&vcpu->kvm->arch.vcpus_running); diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 0fdc4a28970b..404deb512844 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * Put whatever is in the decrementer into the * hypervisor decrementer. */ +BEGIN_FTR_SECTION + ld r5, HSTATE_KVM_VCORE(r13) + ld r6, VCORE_KVM(r5) + ld r9, KVM_HOST_LPCR(r6) + andis. r9, r9, LPCR_LD@h +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mfspr r8,SPRN_DEC mftb r7 - mtspr SPRN_HDEC,r8 +BEGIN_FTR_SECTION + /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */ + bne 32f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) extsw r8,r8 +32: mtspr SPRN_HDEC,r8 add r8,r8,r7 std r8,HSTATE_DECEXP(r13) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bdb3f76ceb6b..4888dd494604 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -32,12 +32,29 @@ #include <asm/opal.h> #include <asm/xive-regs.h> +/* Sign-extend HDEC if not on POWER9 */ +#define EXTEND_HDEC(reg) \ +BEGIN_FTR_SECTION; \ + extsw reg, reg; \ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 +/* Stack frame offsets for kvmppc_hv_entry */ +#define SFS 144 +#define STACK_SLOT_TRAP (SFS-4) +#define STACK_SLOT_TID (SFS-16) +#define STACK_SLOT_PSSCR (SFS-24) +#define STACK_SLOT_PID (SFS-32) +#define STACK_SLOT_IAMR (SFS-40) +#define STACK_SLOT_CIABR (SFS-48) +#define STACK_SLOT_DAWR (SFS-56) +#define STACK_SLOT_DAWRX (SFS-64) + /* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. @@ -214,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ + /* HDEC may be larger than DEC for arch >= v3.00, but since the */ + /* HDEC value came from DEC in the first place, it will fit */ mfspr r3, SPRN_HDEC mtspr SPRN_DEC, r3 /* @@ -295,8 +314,9 @@ kvm_novcpu_wakeup: /* See if our timeslice has expired (HDEC is negative) */ mfspr r0, SPRN_HDEC + EXTEND_HDEC(r0) li r12, BOOK3S_INTERRUPT_HV_DECREMENTER - cmpwi r0, 0 + cmpdi r0, 0 blt kvm_novcpu_exit /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ @@ -319,10 +339,10 @@ kvm_novcpu_exit: bl kvmhv_accumulate_time #endif 13: mr r3, r12 - stw r12, 112-4(r1) + stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_commence_exit nop - lwz r12, 112-4(r1) + lwz r12, STACK_SLOT_TRAP(r1) b kvmhv_switch_to_host /* @@ -390,8 +410,8 @@ kvm_secondary_got_guest: lbz r4, HSTATE_PTID(r13) cmpwi r4, 0 bne 63f - lis r6, 0x7fff - ori r6, r6, 0xffff + LOAD_REG_ADDR(r6, decrementer_max) + ld r6, 0(r6) mtspr SPRN_HDEC, r6 /* and set per-LPAR registers, if doing dynamic micro-threading */ ld r6, HSTATE_SPLIT_MODE(r13) @@ -545,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * * *****************************************************************************/ -/* Stack frame offsets */ -#define STACK_SLOT_TID (112-16) -#define STACK_SLOT_PSSCR (112-24) -#define STACK_SLOT_PID (112-32) - .global kvmppc_hv_entry kvmppc_hv_entry: @@ -565,7 +580,7 @@ kvmppc_hv_entry: */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -112(r1) + stdu r1, -SFS(r1) /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) @@ -749,10 +764,20 @@ BEGIN_FTR_SECTION mfspr r5, SPRN_TIDR mfspr r6, SPRN_PSSCR mfspr r7, SPRN_PID + mfspr r8, SPRN_IAMR std r5, STACK_SLOT_TID(r1) std r6, STACK_SLOT_PSSCR(r1) std r7, STACK_SLOT_PID(r1) + std r8, STACK_SLOT_IAMR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) +BEGIN_FTR_SECTION + mfspr r5, SPRN_CIABR + mfspr r6, SPRN_DAWR + mfspr r7, SPRN_DAWRX + std r5, STACK_SLOT_CIABR(r1) + std r6, STACK_SLOT_DAWR(r1) + std r7, STACK_SLOT_DAWRX(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION /* Set partition DABR */ @@ -968,7 +993,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) /* Check if HDEC expires soon */ mfspr r3, SPRN_HDEC - cmpwi r3, 512 /* 1 microsecond */ + EXTEND_HDEC(r3) + cmpdi r3, 512 /* 1 microsecond */ blt hdec_soon #ifdef CONFIG_KVM_XICS @@ -1505,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) * set by the guest could disrupt the host. */ li r0, 0 - mtspr SPRN_IAMR, r0 - mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 + mtspr SPRN_PSPB, r0 mtspr SPRN_WORT, r0 BEGIN_FTR_SECTION + mtspr SPRN_IAMR, r0 mtspr SPRN_TCSCR, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ li r0, 1 @@ -1525,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 + mtspr SPRN_UAMOR, r6 /* Switch DSCR back to host value */ mfspr r8, SPRN_DSCR @@ -1670,12 +1696,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Restore host values of some registers */ BEGIN_FTR_SECTION + ld r5, STACK_SLOT_CIABR(r1) + ld r6, STACK_SLOT_DAWR(r1) + ld r7, STACK_SLOT_DAWRX(r1) + mtspr SPRN_CIABR, r5 + mtspr SPRN_DAWR, r6 + mtspr SPRN_DAWRX, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) ld r6, STACK_SLOT_PSSCR(r1) ld r7, STACK_SLOT_PID(r1) + ld r8, STACK_SLOT_IAMR(r1) mtspr SPRN_TIDR, r5 mtspr SPRN_PSSCR, r6 mtspr SPRN_PID, r7 + mtspr SPRN_IAMR, r8 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION PPC_INVALIDATE_ERAT @@ -1819,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) - ld r0, 112+PPC_LR_STKOFF(r1) - addi r1, r1, 112 + ld r0, SFS+PPC_LR_STKOFF(r1) + addi r1, r1, SFS mtlr r0 blr @@ -2366,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) mfspr r3, SPRN_DEC mfspr r4, SPRN_HDEC mftb r5 - cmpw r3, r4 + extsw r3, r3 + EXTEND_HDEC(r4) + cmpd r3, r4 ble 67f mtspr SPRN_DEC, r4 67: /* save expiry time of guest decrementer */ - extsw r3, r3 add r3, r3, r5 ld r4, HSTATE_KVM_VCPU(r13) ld r5, HSTATE_KVM_VCORE(r13) diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index cbd82fde5770..09ceea6175ba 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user, struct pt_regs *regs_user_copy) { regs_user->regs = task_pt_regs(current); - regs_user->abi = perf_reg_abi(current); + regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : + PERF_SAMPLE_REGS_ABI_NONE; } diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index e6f444b46207..b5d960d6db3d 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch, return mmio_atsd_reg; } -static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) +static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) { unsigned long launch; @@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) /* PID */ launch |= pid << PPC_BITLSHIFT(38); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); + /* Invalidating the entire process doesn't use a va */ return mmio_launch_invalidate(npu, launch, 0); } static int mmio_invalidate_va(struct npu *npu, unsigned long va, - unsigned long pid) + unsigned long pid, bool flush) { unsigned long launch; @@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va, /* PID */ launch |= pid << PPC_BITLSHIFT(38); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); + return mmio_launch_invalidate(npu, launch, va); } #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) +struct mmio_atsd_reg { + struct npu *npu; + int reg; +}; + +static void mmio_invalidate_wait( + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) +{ + struct npu *npu; + int i, reg; + + /* Wait for all invalidations to complete */ + for (i = 0; i <= max_npu2_index; i++) { + if (mmio_atsd_reg[i].reg < 0) + continue; + + /* Wait for completion */ + npu = mmio_atsd_reg[i].npu; + reg = mmio_atsd_reg[i].reg; + while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) + cpu_relax(); + + put_mmio_atsd_reg(npu, reg); + + /* + * The GPU requires two flush ATSDs to ensure all entries have + * been flushed. We use PID 0 as it will never be used for a + * process on the GPU. + */ + if (flush) + mmio_invalidate_pid(npu, 0, true); + } +} + /* * Invalidate either a single address or an entire PID depending on * the value of va. */ static void mmio_invalidate(struct npu_context *npu_context, int va, - unsigned long address) + unsigned long address, bool flush) { - int i, j, reg; + int i, j; struct npu *npu; struct pnv_phb *nphb; struct pci_dev *npdev; - struct { - struct npu *npu; - int reg; - } mmio_atsd_reg[NV_MAX_NPUS]; + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; unsigned long pid = npu_context->mm->context.id; /* @@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, if (va) mmio_atsd_reg[i].reg = - mmio_invalidate_va(npu, address, pid); + mmio_invalidate_va(npu, address, pid, + flush); else mmio_atsd_reg[i].reg = - mmio_invalidate_pid(npu, pid); + mmio_invalidate_pid(npu, pid, flush); /* * The NPU hardware forwards the shootdown to all GPUs @@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, */ flush_tlb_mm(npu_context->mm); - /* Wait for all invalidations to complete */ - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; - - /* Wait for completion */ - npu = mmio_atsd_reg[i].npu; - reg = mmio_atsd_reg[i].reg; - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) - cpu_relax(); - put_mmio_atsd_reg(npu, reg); - } + mmio_invalidate_wait(mmio_atsd_reg, flush); + if (flush) + /* Wait for the flush to complete */ + mmio_invalidate_wait(mmio_atsd_reg, false); } static void pnv_npu2_mn_release(struct mmu_notifier *mn, @@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn, * There should be no more translation requests for this PID, but we * need to ensure any entries for it are removed from the TLB. */ - mmio_invalidate(npu_context, 0, 0); + mmio_invalidate(npu_context, 0, 0, true); } static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, @@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, { struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, 1, address); + mmio_invalidate(npu_context, 1, address, true); } static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, @@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, { struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, 1, address); + mmio_invalidate(npu_context, 1, address, true); } static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, @@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, struct npu_context *npu_context = mn_to_npu_context(mn); unsigned long address; - for (address = start; address <= end; address += PAGE_SIZE) - mmio_invalidate(npu_context, 1, address); + for (address = start; address < end; address += PAGE_SIZE) + mmio_invalidate(npu_context, 1, address, false); + + /* Do the flush only on the final addess == end */ + mmio_invalidate(npu_context, 1, address, true); } static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { @@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, /* No nvlink associated with this GPU device */ return ERR_PTR(-ENODEV); - if (!mm) { - /* kernel thread contexts are not supported */ + if (!mm || mm->context.id == 0) { + /* + * Kernel thread contexts are not supported and context id 0 is + * reserved on the GPU. + */ return ERR_PTR(-EINVAL); } diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 60d395fdc864..aeac013968f2 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -221,11 +221,6 @@ extern void release_thread(struct task_struct *); /* Free guarded storage control block for current */ void exit_thread_gs(void); -/* - * Return saved PC of a blocked thread. - */ -extern unsigned long thread_saved_pc(struct task_struct *t); - unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ (task_stack_page(tsk) + THREAD_SIZE) - 1) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index e545ffe5155a..8e622bb52f7a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -564,8 +564,6 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { - if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW) - diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); @@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused) break; case REIPL_METHOD_CCW_DIAG: diag308(DIAG308_SET, reipl_block_ccw); - if (MACHINE_IS_LPAR) - diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); - else - diag308(DIAG308_LOAD_CLEAR, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_RW_DIAG: diag308(DIAG308_SET, reipl_block_fcp); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 999d7154bbdc..bb32b8618bf6 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -41,31 +41,6 @@ asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); -/* - * Return saved PC of a blocked thread. used in kernel/sched. - * resume in entry.S does not create a new stack frame, it - * just stores the registers %r6-%r15 to the frame given by - * schedule. We want to return the address of the caller of - * schedule, so we have to walk the backchain one time to - * find the frame schedule() store its return address. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct stack_frame *sf, *low, *high; - - if (!tsk || !task_stack_page(tsk)) - return 0; - low = task_stack_page(tsk); - high = (struct stack_frame *) task_pt_regs(tsk); - sf = (struct stack_frame *) tsk->thread.ksp; - if (sf <= low || sf > high) - return 0; - sf = (struct stack_frame *) sf->back_chain; - if (sf <= low || sf > high) - return 0; - return sf->gprs[8]; -} - extern void kernel_thread_starter(void); /* diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9da243d94cc3..3b297fa3aa67 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, ptr = asce.origin * 4096; if (asce.r) { *fake = 1; + ptr = 0; asce.dt = ASCE_TYPE_REGION1; } switch (asce.dt) { case ASCE_TYPE_REGION1: - if (vaddr.rfx01 > asce.tl && !asce.r) + if (vaddr.rfx01 > asce.tl && !*fake) return PGM_REGION_FIRST_TRANS; break; case ASCE_TYPE_REGION2: @@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, union region1_table_entry rfte; if (*fake) { - /* offset in 16EB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.rsx << 53UL); + ptr += (unsigned long) vaddr.rfx << 53; rfte.val = ptr; goto shadow_r2t; } @@ -1036,8 +1036,7 @@ shadow_r2t: union region2_table_entry rste; if (*fake) { - /* offset in 8PB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.rtx << 42UL); + ptr += (unsigned long) vaddr.rsx << 42; rste.val = ptr; goto shadow_r3t; } @@ -1064,8 +1063,7 @@ shadow_r3t: union region3_table_entry rtte; if (*fake) { - /* offset in 4TB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.sx << 31UL); + ptr += (unsigned long) vaddr.rtx << 31; rtte.val = ptr; goto shadow_sgt; } @@ -1101,8 +1099,7 @@ shadow_sgt: union segment_table_entry ste; if (*fake) { - /* offset in 2G guest memory block */ - ptr = ptr + ((unsigned long) vaddr.sx << 20UL); + ptr += (unsigned long) vaddr.sx << 20; ste.val = ptr; goto shadow_pgt; } diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h index d9a922d8711b..299274581968 100644 --- a/arch/score/include/asm/processor.h +++ b/arch/score/include/asm/processor.h @@ -13,7 +13,6 @@ struct task_struct; */ extern void (*cpu_wait)(void); -extern unsigned long thread_saved_pc(struct task_struct *tsk); extern void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp); extern unsigned long get_wchan(struct task_struct *p); diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index eb64d7a677cb..6e20241a1ed4 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r) return 1; } -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return task_pt_regs(tsk)->cp0_epc; -} - unsigned long get_wchan(struct task_struct *task) { if (!task || task == current || task->state == TASK_RUNNING) diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index dd27159819eb..b395e5620c0b 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -67,9 +67,6 @@ struct thread_struct { .current_ds = KERNEL_DS, \ } -/* Return saved PC of a blocked thread. */ -unsigned long thread_saved_pc(struct task_struct *t); - /* Do necessary setup to start up a newly executed thread. */ static inline void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index b58ee9018433..f04dc5a43062 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -89,9 +89,7 @@ struct thread_struct { #include <linux/types.h> #include <asm/fpumacro.h> -/* Return saved PC of a blocked thread. */ struct task_struct; -unsigned long thread_saved_pc(struct task_struct *); /* On Uniprocessor, even in RMO processes see TSO semantics */ #ifdef CONFIG_SMP diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index b6dac8e980f0..9245f93398c7 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -177,14 +177,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) } /* - * Note: sparc64 has a pretty intricated thread_saved_pc, check it out. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return task_thread_info(tsk)->kpc; -} - -/* * Free current thread data structures etc.. */ void exit_thread(struct task_struct *tsk) diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 1badc493e62e..b96104da5bd6 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init); #endif -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct thread_info *ti = task_thread_info(tsk); - unsigned long ret = 0xdeadbeefUL; - - if (ti && ti->ksp) { - unsigned long *sp; - sp = (unsigned long *)(ti->ksp + STACK_BIAS); - if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL && - sp[14]) { - unsigned long *fp; - fp = (unsigned long *)(sp[14] + STACK_BIAS); - if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL) - ret = fp[15]; - } - } - return ret; -} - /* Free current thread data structures etc.. */ void exit_thread(struct task_struct *tsk) { diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 0bc9968b97a1..f71e5206650b 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task) extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags); - -/* - * Return saved (kernel) PC of a blocked thread. - * Only used in a printk() in kernel/sched/core.c, so don't work too hard. - */ -#define thread_saved_pc(t) ((t)->thread.pc) - unsigned long get_wchan(struct task_struct *p); /* Return initial ksp value for given task. */ diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 2d1e0dd5bb0b..f6d1a3f747a9 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task) { } -extern unsigned long thread_saved_pc(struct task_struct *t); - static inline void mm_copy_segments(struct mm_struct *from_mm, struct mm_struct *new_mm) { diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 64a1fd06f3fd..7b5640117325 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -56,12 +56,6 @@ union thread_union cpu0_irqstack __attribute__((__section__(".data..init_irqstack"))) = { INIT_THREAD_INFO(init_task) }; -unsigned long thread_saved_pc(struct task_struct *task) -{ - /* FIXME: Need to look up userspace_pid by cpu */ - return os_process_pc(userspace_pid[0]); -} - /* Changed in setup_arch, which is called in early boot */ static char host_info[(__NEW_UTS_LEN + 1) * 5]; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a6d91d4e37a1..110ce8238466 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ - [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ + [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ - [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */ + [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ }, [ C(OP_PREFETCH) ] = { [ C(RESULT_ACCESS) ] = 0x0, diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 055962615779..722d0e568863 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -296,6 +296,7 @@ struct x86_emulate_ctxt { bool perm_ok; /* do not check permissions if true */ bool ud; /* inject an #UD if host doesn't support insn */ + bool tf; /* TF value before instruction (after for syscall/sysret) */ bool have_exception; struct x86_exception exception; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index fba100713924..d5acc27ed1cc 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -2,8 +2,7 @@ #define _ASM_X86_MSHYPER_H #include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/clocksource.h> +#include <linux/atomic.h> #include <asm/hyperv.h> /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3cada998a402..a28b671f1549 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -860,8 +860,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0bb88428cbf2..3ca198080ea9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -545,17 +545,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } /* - * Return saved PC of a blocked thread. - * What is this good for? it will be always the scheduler or ret_from_fork. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct inactive_task_frame *frame = - (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp); - return READ_ONCE_NOCHECK(frame->ret_addr); -} - -/* * Called from fs/proc with a reference on @p to find the function * which called into schedule(). This needs to be done carefully * because the task might wake up and we might look at a stack diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0816ab2e8adc..80890dee66ce 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); } + ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 87d3cb901935..0e846f0cb83b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ctxt->eflags = kvm_get_rflags(vcpu); + ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; + ctxt->eip = kvm_rip_read(vcpu); ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : @@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, return dr6; } -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r) +static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) { struct kvm_run *kvm_run = vcpu->run; - /* - * rflags is the old, "raw" value of the flags. The new value has - * not been saved yet. - * - * This is correct even for TF set by the guest, because "the - * processor will not generate this exception after the instruction - * that sets the TF flag". - */ - if (unlikely(rflags & X86_EFLAGS_TF)) { - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | - DR6_RTM; - kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; - kvm_run->debug.arch.exception = DB_VECTOR; - kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = EMULATE_USER_EXIT; - } else { - /* - * "Certain debug exceptions may clear bit 0-3. The - * remaining contents of the DR6 register are never - * cleared by the processor". - */ - vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= DR6_BS | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); - } + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; + kvm_run->debug.arch.exception = DB_VECTOR; + kvm_run->exit_reason = KVM_EXIT_DEBUG; + *r = EMULATE_USER_EXIT; + } else { + /* + * "Certain debug exceptions may clear bit 0-3. The + * remaining contents of the DR6 register are never + * cleared by the processor". + */ + vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 |= DR6_BS | DR6_RTM; + kvm_queue_exception(vcpu, DB_VECTOR); } } @@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) int r = EMULATE_DONE; kvm_x86_ops->skip_emulated_instruction(vcpu); - kvm_vcpu_check_singlestep(vcpu, rflags, &r); + + /* + * rflags is the old, "raw" value of the flags. The new value has + * not been saved yet. + * + * This is correct even for TF set by the guest, because "the + * processor will not generate this exception after the instruction + * that sets the TF flag". + */ + if (unlikely(rflags & X86_EFLAGS_TF)) + kvm_vcpu_do_singlestep(vcpu, &r); return r == EMULATE_DONE; } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); @@ -5726,8 +5727,9 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE) - kvm_vcpu_check_singlestep(vcpu, rflags, &r); + if (r == EMULATE_DONE && + (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) + kvm_vcpu_do_singlestep(vcpu, &r); if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) __kvm_set_rflags(vcpu, ctxt->eflags); diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 003eeee3fbc6..30ee8c608853 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -213,8 +213,6 @@ struct mm_struct; #define release_segments(mm) do { } while(0) #define forget_segments() do { } while (0) -#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc) - extern unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) diff --git a/block/bio.c b/block/bio.c index 888e7801c638..26b0810fb8ea 100644 --- a/block/bio.c +++ b/block/bio.c @@ -240,20 +240,21 @@ fallback: return bvl; } -static void __bio_free(struct bio *bio) +void bio_uninit(struct bio *bio) { bio_disassociate_task(bio); if (bio_integrity(bio)) bio_integrity_free(bio); } +EXPORT_SYMBOL(bio_uninit); static void bio_free(struct bio *bio) { struct bio_set *bs = bio->bi_pool; void *p; - __bio_free(bio); + bio_uninit(bio); if (bs) { bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); @@ -271,6 +272,11 @@ static void bio_free(struct bio *bio) } } +/* + * Users of this function have their own bio allocation. Subsequently, + * they must remember to pair any call to bio_init() with bio_uninit() + * when IO has completed, or when the bio is released. + */ void bio_init(struct bio *bio, struct bio_vec *table, unsigned short max_vecs) { @@ -297,7 +303,7 @@ void bio_reset(struct bio *bio) { unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS); - __bio_free(bio); + bio_uninit(bio); memset(bio, 0, BIO_RESET_BYTES); bio->bi_flags = flags; diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 1f5b692526ae..0ded5e846335 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q, __blk_mq_sched_assign_ioc(q, rq, bio, ioc); } +/* + * Mark a hardware queue as needing a restart. For shared queues, maintain + * a count of how many hardware queues are marked for restart. + */ +static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) +{ + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + return; + + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { + struct request_queue *q = hctx->queue; + + if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_inc(&q->shared_hctx_restart); + } else + set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); +} + +static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) +{ + if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + return false; + + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { + struct request_queue *q = hctx->queue; + + if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_dec(&q->shared_hctx_restart); + } else + clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + + if (blk_mq_hctx_has_pending(hctx)) { + blk_mq_run_hw_queue(hctx, true); + return true; + } + + return false; +} + struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, @@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, return true; } -static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) -{ - if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { - clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (blk_mq_hctx_has_pending(hctx)) { - blk_mq_run_hw_queue(hctx, true); - return true; - } - } - return false; -} - /** * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list * @pos: loop cursor. @@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) unsigned int i, j; if (set->flags & BLK_MQ_F_TAG_SHARED) { + /* + * If this is 0, then we know that no hardware queues + * have RESTART marked. We're done. + */ + if (!atomic_read(&queue->shared_hctx_restart)) + return; + rcu_read_lock(); list_for_each_entry_rcu_rr(q, queue, &set->tag_list, tag_set_list) { diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index edafb5383b7b..5007edece51a 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) return false; } -/* - * Mark a hardware queue as needing a restart. - */ -static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) -{ - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); -} - static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); diff --git a/block/blk-mq.c b/block/blk-mq.c index bb66c96850b1..958cedaff8b8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q, } } +/* + * Caller needs to ensure that we're either frozen/quiesced, or that + * the queue isn't live yet. + */ static void queue_set_hctx_shared(struct request_queue *q, bool shared) { struct blk_mq_hw_ctx *hctx; int i; queue_for_each_hw_ctx(q, hctx, i) { - if (shared) + if (shared) { + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_inc(&q->shared_hctx_restart); hctx->flags |= BLK_MQ_F_TAG_SHARED; - else + } else { + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + atomic_dec(&q->shared_hctx_restart); hctx->flags &= ~BLK_MQ_F_TAG_SHARED; + } } } -static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) +static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, + bool shared) { struct request_queue *q; diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 8af664f7d27c..be117495eb43 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -877,7 +877,7 @@ static void aead_sock_destruct(struct sock *sk) unsigned int ivlen = crypto_aead_ivsize( crypto_aead_reqtfm(&ctx->aead_req)); - WARN_ON(atomic_read(&sk->sk_refcnt) != 0); + WARN_ON(refcount_read(&sk->sk_refcnt) != 0); aead_put_sgl(sk); sock_kzfree_s(sk, ctx->iv, ivlen); sock_kfree_s(sk, ctx, ctx->len); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 3a10d7573477..d53162997f32 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1428,6 +1428,37 @@ static void acpi_init_coherency(struct acpi_device *adev) adev->flags.coherent_dma = cca; } +static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data) +{ + bool *is_spi_i2c_slave_p = data; + + if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) + return 1; + + /* + * devices that are connected to UART still need to be enumerated to + * platform bus + */ + if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART) + *is_spi_i2c_slave_p = true; + + /* no need to do more checking */ + return -1; +} + +static bool acpi_is_spi_i2c_slave(struct acpi_device *device) +{ + struct list_head resource_list; + bool is_spi_i2c_slave = false; + + INIT_LIST_HEAD(&resource_list); + acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave, + &is_spi_i2c_slave); + acpi_dev_free_resource_list(&resource_list); + + return is_spi_i2c_slave; +} + void acpi_init_device_object(struct acpi_device *device, acpi_handle handle, int type, unsigned long long sta) { @@ -1443,6 +1474,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle, acpi_bus_get_flags(device); device->flags.match_driver = false; device->flags.initialized = true; + device->flags.spi_i2c_slave = acpi_is_spi_i2c_slave(device); acpi_device_clear_enumerated(device); device_initialize(&device->dev); dev_set_uevent_suppress(&device->dev, true); @@ -1727,38 +1759,13 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used, return AE_OK; } -static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data) -{ - bool *is_spi_i2c_slave_p = data; - - if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) - return 1; - - /* - * devices that are connected to UART still need to be enumerated to - * platform bus - */ - if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART) - *is_spi_i2c_slave_p = true; - - /* no need to do more checking */ - return -1; -} - static void acpi_default_enumeration(struct acpi_device *device) { - struct list_head resource_list; - bool is_spi_i2c_slave = false; - /* * Do not enumerate SPI/I2C slaves as they will be enumerated by their * respective parents. */ - INIT_LIST_HEAD(&resource_list); - acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave, - &is_spi_i2c_slave); - acpi_dev_free_resource_list(&resource_list); - if (!is_spi_i2c_slave) { + if (!device->flags.spi_i2c_slave) { acpi_create_platform_device(device, NULL); acpi_device_set_enumerated(device); } else { @@ -1854,7 +1861,7 @@ static void acpi_bus_attach(struct acpi_device *device) return; device->flags.match_driver = true; - if (ret > 0) { + if (ret > 0 && !device->flags.spi_i2c_slave) { acpi_device_set_enumerated(device); goto ok; } @@ -1863,10 +1870,10 @@ static void acpi_bus_attach(struct acpi_device *device) if (ret < 0) return; - if (device->pnp.type.platform_id) - acpi_default_enumeration(device); - else + if (!device->pnp.type.platform_id && !device->flags.spi_i2c_slave) acpi_device_set_enumerated(device); + else + acpi_default_enumeration(device); ok: list_for_each_entry(child, &device->children, node) diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 7584ae1ded85..f0433adcd8fc 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -924,12 +924,7 @@ fore200e_tx_irq(struct fore200e* fore200e) else { dev_kfree_skb_any(entry->skb); } -#if 1 - /* race fixed by the above incarnation mechanism, but... */ - if (atomic_read(&sk_atm(vcc)->sk_wmem_alloc) < 0) { - atomic_set(&sk_atm(vcc)->sk_wmem_alloc, 0); - } -#endif + /* check error condition */ if (*entry->status & STATUS_ERROR) atomic_inc(&vcc->stats->tx_err); @@ -1130,13 +1125,9 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp return -ENOMEM; } - ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0); - vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); - ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0); - return 0; } @@ -1572,7 +1563,6 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) unsigned long flags; ASSERT(vcc); - ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0); ASSERT(fore200e); ASSERT(fore200e_vcc); diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 461da2bce8ef..37ee21c5a5ca 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -2395,7 +2395,7 @@ he_close(struct atm_vcc *vcc) * TBRQ, the host issues the close command to the adapter. */ - while (((tx_inuse = atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) && + while (((tx_inuse = refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) && (retry < MAX_RETRY)) { msleep(sleep); if (sleep < 250) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 4e64de380bda..60bacba03d17 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -724,7 +724,7 @@ push_on_scq(struct idt77252_dev *card, struct vc_map *vc, struct sk_buff *skb) struct sock *sk = sk_atm(vcc); vc->estimator->cells += (skb->len + 47) / 48; - if (atomic_read(&sk->sk_wmem_alloc) > + if (refcount_read(&sk->sk_wmem_alloc) > (sk->sk_sndbuf >> 1)) { u32 cps = vc->estimator->maxcps; @@ -2009,7 +2009,7 @@ idt77252_send_oam(struct atm_vcc *vcc, void *cell, int flags) atomic_inc(&vcc->stats->tx_err); return -ENOMEM; } - atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); skb_put_data(skb, cell, 52); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 726c32e35db9..0e824091a12f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg) unsigned long timeout; int ret; - xen_blkif_get(blkif); - set_freezable(); while (!kthread_should_stop()) { if (try_to_freeze()) @@ -665,7 +663,6 @@ purge_gnt_list: print_stats(ring); ring->xenblkd = NULL; - xen_blkif_put(blkif); return 0; } @@ -1436,34 +1433,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, static void make_response(struct xen_blkif_ring *ring, u64 id, unsigned short op, int st) { - struct blkif_response resp; + struct blkif_response *resp; unsigned long flags; union blkif_back_rings *blk_rings; int notify; - resp.id = id; - resp.operation = op; - resp.status = st; - spin_lock_irqsave(&ring->blk_ring_lock, flags); blk_rings = &ring->blk_rings; /* Place on the response ring for the relevant domain. */ switch (ring->blkif->blk_protocol) { case BLKIF_PROTOCOL_NATIVE: - memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), - &resp, sizeof(resp)); + resp = RING_GET_RESPONSE(&blk_rings->native, + blk_rings->native.rsp_prod_pvt); break; case BLKIF_PROTOCOL_X86_32: - memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), - &resp, sizeof(resp)); + resp = RING_GET_RESPONSE(&blk_rings->x86_32, + blk_rings->x86_32.rsp_prod_pvt); break; case BLKIF_PROTOCOL_X86_64: - memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), - &resp, sizeof(resp)); + resp = RING_GET_RESPONSE(&blk_rings->x86_64, + blk_rings->x86_64.rsp_prod_pvt); break; default: BUG(); } + + resp->id = id; + resp->operation = op; + resp->status = st; + blk_rings->common.rsp_prod_pvt++; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); spin_unlock_irqrestore(&ring->blk_ring_lock, flags); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index dea61f6ab8cb..ecb35fe8ca8d 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues; struct blkif_common_request { char dummy; }; -struct blkif_common_response { - char dummy; -}; + +/* i386 protocol version */ struct blkif_x86_32_request_rw { uint8_t nr_segments; /* number of segments */ @@ -129,14 +128,6 @@ struct blkif_x86_32_request { } u; } __attribute__((__packed__)); -/* i386 protocol version */ -#pragma pack(push, 4) -struct blkif_x86_32_response { - uint64_t id; /* copied from request */ - uint8_t operation; /* copied from request */ - int16_t status; /* BLKIF_RSP_??? */ -}; -#pragma pack(pop) /* x86_64 protocol version */ struct blkif_x86_64_request_rw { @@ -193,18 +184,12 @@ struct blkif_x86_64_request { } u; } __attribute__((__packed__)); -struct blkif_x86_64_response { - uint64_t __attribute__((__aligned__(8))) id; - uint8_t operation; /* copied from request */ - int16_t status; /* BLKIF_RSP_??? */ -}; - DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, - struct blkif_common_response); + struct blkif_response); DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, - struct blkif_x86_32_response); + struct blkif_response __packed); DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, - struct blkif_x86_64_response); + struct blkif_response); union blkif_back_rings { struct blkif_back_ring native; @@ -281,6 +266,7 @@ struct xen_blkif_ring { wait_queue_head_t wq; atomic_t inflight; + bool active; /* One thread per blkif ring. */ struct task_struct *xenblkd; unsigned int waiting_reqs; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 1f3dfaa54d87..792da683e70d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif) init_waitqueue_head(&ring->shutdown_wq); ring->blkif = blkif; ring->st_print = jiffies; - xen_blkif_get(blkif); + ring->active = true; } return 0; @@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) struct xen_blkif_ring *ring = &blkif->rings[r]; unsigned int i = 0; + if (!ring->active) + continue; + if (ring->xenblkd) { kthread_stop(ring->xenblkd); wake_up(&ring->shutdown_wq); - ring->xenblkd = NULL; } /* The above kthread_stop() guarantees that at this point we @@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) BUG_ON(ring->free_pages_num != 0); BUG_ON(ring->persistent_gnt_c != 0); WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); - xen_blkif_put(blkif); + ring->active = false; } blkif->nr_ring_pages = 0; /* @@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) static void xen_blkif_free(struct xen_blkif *blkif) { - - xen_blkif_disconnect(blkif); + WARN_ON(xen_blkif_disconnect(blkif)); xen_vbd_free(&blkif->vbd); + kfree(blkif->be->mode); + kfree(blkif->be); /* Make sure everything is drained before shutting down */ kmem_cache_free(xen_blkif_cachep, blkif); @@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev) xen_blkif_put(be->blkif); } - kfree(be->mode); - kfree(be); return 0; } diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 24f8c4e93f4e..9ab6cfbb831d 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -295,6 +295,7 @@ static const struct { { 0x410e, "BCM43341B0" }, /* 002.001.014 */ { 0x4406, "BCM4324B3" }, /* 002.004.006 */ { 0x610c, "BCM4354" }, /* 003.001.012 */ + { 0x2209, "BCM43430A1" }, /* 001.002.009 */ { } }; diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index d2e9e2d1b014..6a662d0161b4 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -419,8 +419,7 @@ finalize: if (err) return err; - err = bcm_request_irq(bcm); - if (!err) + if (!bcm_request_irq(bcm)) err = bcm_setup_sleep(hu); return err; @@ -657,6 +656,15 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { }, .driver_data = &acpi_active_low, }, + { + .ident = "Asus T100CHI", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, + "ASUSTeK COMPUTER INC."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"), + }, + .driver_data = &acpi_active_low, + }, { /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */ .ident = "Lenovo ThinkPad 8", .matches = { diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 7de0edc0ff8c..aea930101dd2 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -31,7 +31,7 @@ #include "hci_uart.h" -struct serdev_device_ops hci_serdev_client_ops; +static struct serdev_device_ops hci_serdev_client_ops; static inline void hci_uart_tx_complete(struct hci_uart *hu, int pkt_type) { @@ -268,7 +268,7 @@ static int hci_uart_receive_buf(struct serdev_device *serdev, const u8 *data, return count; } -struct serdev_device_ops hci_serdev_client_ops = { +static struct serdev_device_ops hci_serdev_client_ops = { .receive_buf = hci_uart_receive_buf, .write_wakeup = hci_uart_write_wakeup, }; diff --git a/drivers/char/random.c b/drivers/char/random.c index e870f329db88..01a260f67437 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -803,13 +803,13 @@ static int crng_fast_load(const char *cp, size_t len) p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp; cp++; crng_init_cnt++; len--; } + spin_unlock_irqrestore(&primary_crng.lock, flags); if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { invalidate_batched_entropy(); crng_init = 1; wake_up_interruptible(&crng_init_wait); pr_notice("random: fast init done\n"); } - spin_unlock_irqrestore(&primary_crng.lock, flags); return 1; } @@ -841,6 +841,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } memzero_explicit(&buf, sizeof(buf)); crng->init_time = jiffies; + spin_unlock_irqrestore(&primary_crng.lock, flags); if (crng == &primary_crng && crng_init < 2) { invalidate_batched_entropy(); crng_init = 2; @@ -848,7 +849,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) wake_up_interruptible(&crng_init_wait); pr_notice("random: crng init done\n"); } - spin_unlock_irqrestore(&primary_crng.lock, flags); } static inline void crng_wait_ready(void) @@ -2041,8 +2041,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64); u64 get_random_u64(void) { u64 ret; - bool use_lock = crng_init < 2; - unsigned long flags; + bool use_lock = READ_ONCE(crng_init) < 2; + unsigned long flags = 0; struct batched_entropy *batch; #if BITS_PER_LONG == 64 @@ -2073,8 +2073,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32); u32 get_random_u32(void) { u32 ret; - bool use_lock = crng_init < 2; - unsigned long flags; + bool use_lock = READ_ONCE(crng_init) < 2; + unsigned long flags = 0; struct batched_entropy *batch; if (arch_get_random_int(&ret)) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 4bed671e490e..8b5c30062d99 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -1209,9 +1209,9 @@ arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame) return 0; } - rate = readl_relaxed(frame + CNTFRQ); + rate = readl_relaxed(base + CNTFRQ); - iounmap(frame); + iounmap(base); return rate; } diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c index 44e5e951583b..8e64b8460f11 100644 --- a/drivers/clocksource/cadence_ttc_timer.c +++ b/drivers/clocksource/cadence_ttc_timer.c @@ -18,6 +18,7 @@ #include <linux/clk.h> #include <linux/interrupt.h> #include <linux/clockchips.h> +#include <linux/clocksource.h> #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/slab.h> diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 2e9c830ae1cd..c4656c4d44a6 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -12,6 +12,7 @@ #include <linux/clk.h> #include <linux/clockchips.h> +#include <linux/clocksource.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/irq.h> diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 5104b6398139..c83ea68be792 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -721,7 +721,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev, u32 set; if (!of_device_is_compatible(mvchip->chip.of_node, - "marvell,armada-370-xp-gpio")) + "marvell,armada-370-gpio")) return 0; if (IS_ERR(mvchip->clk)) @@ -852,7 +852,7 @@ static const struct of_device_id mvebu_gpio_of_match[] = { .data = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP, }, { - .compatible = "marvell,armada-370-xp-gpio", + .compatible = "marvell,armada-370-gpio", .data = (void *) MVEBU_GPIO_SOC_VARIANT_ORION, }, { @@ -1128,7 +1128,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) mvchip); } - /* Armada 370/XP has simple PWM support for GPIO lines */ + /* Some MVEBU SoCs have simple PWM support for GPIO lines */ if (IS_ENABLED(CONFIG_PWM)) return mvebu_pwm_probe(pdev, mvchip, id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 1cf78f4dd339..1e8e1123ddf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -693,6 +693,10 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev) DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n", adev->clock.default_dispclk / 100); adev->clock.default_dispclk = 60000; + } else if (adev->clock.default_dispclk <= 60000) { + DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n", + adev->clock.default_dispclk / 100); + adev->clock.default_dispclk = 62500; } adev->clock.dp_extclk = le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f2d705e6a75a..ab6b0d0febab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -449,6 +449,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, + {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, /* Vega 10 */ {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c index 8c9bc75a9c2d..8a0818b23ea4 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c @@ -165,7 +165,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); - ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args; + ENABLE_DISP_POWER_GATING_PS_ALLOCATION args; memset(&args, 0, sizeof(args)); @@ -178,7 +178,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) { int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); - ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args; + ENABLE_DISP_POWER_GATING_PS_ALLOCATION args; memset(&args, 0, sizeof(args)); diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 9f847615ac74..48ca2457df8c 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1229,21 +1229,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, if (!connector) return -ENOENT; - drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - encoder = drm_connector_get_encoder(connector); - if (encoder) - out_resp->encoder_id = encoder->base.id; - else - out_resp->encoder_id = 0; - - ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic, - (uint32_t __user *)(unsigned long)(out_resp->props_ptr), - (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr), - &out_resp->count_props); - drm_modeset_unlock(&dev->mode_config.connection_mutex); - if (ret) - goto out_unref; - for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) if (connector->encoder_ids[i] != 0) encoders_count++; @@ -1256,7 +1241,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, if (put_user(connector->encoder_ids[i], encoder_ptr + copied)) { ret = -EFAULT; - goto out_unref; + goto out; } copied++; } @@ -1300,15 +1285,32 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, if (copy_to_user(mode_ptr + copied, &u_mode, sizeof(u_mode))) { ret = -EFAULT; + mutex_unlock(&dev->mode_config.mutex); + goto out; } copied++; } } out_resp->count_modes = mode_count; -out: mutex_unlock(&dev->mode_config.mutex); -out_unref: + + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + encoder = drm_connector_get_encoder(connector); + if (encoder) + out_resp->encoder_id = encoder->base.id; + else + out_resp->encoder_id = 0; + + /* Only grab properties after probing, to make sure EDID and other + * properties reflect the latest status. */ + ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic, + (uint32_t __user *)(unsigned long)(out_resp->props_ptr), + (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr), + &out_resp->count_props); + drm_modeset_unlock(&dev->mode_config.connection_mutex); + +out: drm_connector_put(connector); return ret; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h index c4a091e87426..e437fba1209d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h @@ -106,9 +106,10 @@ struct etnaviv_gem_submit { struct etnaviv_gpu *gpu; struct ww_acquire_ctx ticket; struct dma_fence *fence; + u32 flags; unsigned int nr_bos; struct etnaviv_gem_submit_bo bos[0]; - u32 flags; + /* No new members here, the previous one is variable-length! */ }; int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index de80ee1b71df..1013765274da 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -172,7 +172,7 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit) for (i = 0; i < submit->nr_bos; i++) { struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE; - bool explicit = !(submit->flags & ETNA_SUBMIT_NO_IMPLICIT); + bool explicit = !!(submit->flags & ETNA_SUBMIT_NO_IMPLICIT); ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write, explicit); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d689e511744e..4bd1467c17b1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -292,6 +292,8 @@ static int per_file_stats(int id, void *ptr, void *data) struct file_stats *stats = data; struct i915_vma *vma; + lockdep_assert_held(&obj->base.dev->struct_mutex); + stats->count++; stats->total += obj->base.size; if (!obj->bind_count) @@ -476,6 +478,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data) struct drm_i915_gem_request *request; struct task_struct *task; + mutex_lock(&dev->struct_mutex); + memset(&stats, 0, sizeof(stats)); stats.file_priv = file->driver_priv; spin_lock(&file->table_lock); @@ -487,7 +491,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data) * still alive (e.g. get_pid(current) => fork() => exit()). * Therefore, we need to protect this ->comm access using RCU. */ - mutex_lock(&dev->struct_mutex); request = list_first_entry_or_null(&file_priv->mm.request_list, struct drm_i915_gem_request, client_link); @@ -497,6 +500,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) PIDTYPE_PID); print_file_stats(m, task ? task->comm : "<unknown>", stats); rcu_read_unlock(); + mutex_unlock(&dev->struct_mutex); } mutex_unlock(&dev->filelist_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 462031cbd77f..615f0a855222 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2285,8 +2285,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment; + gfp_t noreclaim; int ret; - gfp_t gfp; /* Assert that the object is not currently in any GPU domain. As it * wasn't in the GTT, there shouldn't be any way it could have been in @@ -2315,22 +2315,31 @@ rebuild_st: * Fail silently without starting the shrinker */ mapping = obj->base.filp->f_mapping; - gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); - gfp |= __GFP_NORETRY | __GFP_NOWARN; + noreclaim = mapping_gfp_constraint(mapping, + ~(__GFP_IO | __GFP_RECLAIM)); + noreclaim |= __GFP_NORETRY | __GFP_NOWARN; + sg = st->sgl; st->nents = 0; for (i = 0; i < page_count; i++) { - page = shmem_read_mapping_page_gfp(mapping, i, gfp); - if (unlikely(IS_ERR(page))) { - i915_gem_shrink(dev_priv, - page_count, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); + const unsigned int shrink[] = { + I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, + 0, + }, *s = shrink; + gfp_t gfp = noreclaim; + + do { page = shmem_read_mapping_page_gfp(mapping, i, gfp); - } - if (unlikely(IS_ERR(page))) { - gfp_t reclaim; + if (likely(!IS_ERR(page))) + break; + + if (!*s) { + ret = PTR_ERR(page); + goto err_sg; + } + + i915_gem_shrink(dev_priv, 2 * page_count, *s++); + cond_resched(); /* We've tried hard to allocate the memory by reaping * our own buffer, now let the real VM do its job and @@ -2340,15 +2349,26 @@ rebuild_st: * defer the oom here by reporting the ENOMEM back * to userspace. */ - reclaim = mapping_gfp_mask(mapping); - reclaim |= __GFP_NORETRY; /* reclaim, but no oom */ - - page = shmem_read_mapping_page_gfp(mapping, i, reclaim); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto err_sg; + if (!*s) { + /* reclaim and warn, but no oom */ + gfp = mapping_gfp_mask(mapping); + + /* Our bo are always dirty and so we require + * kswapd to reclaim our pages (direct reclaim + * does not effectively begin pageout of our + * buffers on its own). However, direct reclaim + * only waits for kswapd when under allocation + * congestion. So as a result __GFP_RECLAIM is + * unreliable and fails to actually reclaim our + * dirty pages -- unless you try over and over + * again with !__GFP_NORETRY. However, we still + * want to fail this allocation rather than + * trigger the out-of-memory killer and for + * this we want the future __GFP_MAYFAIL. + */ } - } + } while (1); + if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { @@ -4222,6 +4242,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) mapping = obj->base.filp->f_mapping; mapping_set_gfp_mask(mapping, mask); + GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); i915_gem_object_init(obj, &i915_gem_object_ops); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3e59c8ef27b..9ad13eeed904 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -546,11 +546,12 @@ repeat: } static int -i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, +i915_gem_execbuffer_relocate_entry(struct i915_vma *vma, struct eb_vmas *eb, struct drm_i915_gem_relocation_entry *reloc, struct reloc_cache *cache) { + struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct drm_gem_object *target_obj; struct drm_i915_gem_object *target_i915_obj; @@ -628,6 +629,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -EINVAL; } + /* + * If we write into the object, we need to force the synchronisation + * barrier, either with an asynchronous clflush or if we executed the + * patching using the GPU (though that should be serialised by the + * timeline). To be completely sure, and since we are required to + * do relocations we are already stalling, disable the user's opt + * of our synchronisation. + */ + vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; + ret = relocate_entry(obj, reloc, cache, target_offset); if (ret) return ret; @@ -678,7 +689,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache); + ret = i915_gem_execbuffer_relocate_entry(vma, eb, r, &cache); if (ret) goto out; @@ -726,7 +737,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, reloc_cache_init(&cache, eb->i915); for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache); + ret = i915_gem_execbuffer_relocate_entry(vma, eb, &relocs[i], &cache); if (ret) break; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 5ddbc9499775..a74d0ac737cb 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -623,7 +623,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * GPU processing the request, we never over-estimate the * position of the head. */ - req->head = req->ring->tail; + req->head = req->ring->emit; /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1642fff9cf13..ab5140ba108d 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, GEM_BUG_ON(freespace < wqi_size); /* The GuC firmware wants the tail index in QWords, not bytes */ - tail = rq->tail; - assert_ring_tail_valid(rq->ring, rq->tail); - tail >>= 3; + tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3; GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 1aba47024656..f066e2d785f5 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -650,6 +650,11 @@ int i915_vma_unbind(struct i915_vma *vma) break; } + if (!ret) { + ret = i915_gem_active_retire(&vma->last_fence, + &vma->vm->i915->drm.struct_mutex); + } + __i915_vma_unpin(vma); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 96b0b01677e2..9106ea32b048 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -120,7 +120,8 @@ static void intel_crtc_init_scalers(struct intel_crtc *crtc, static void skylake_pfit_enable(struct intel_crtc *crtc); static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); -static void intel_modeset_setup_hw_state(struct drm_device *dev); +static void intel_modeset_setup_hw_state(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx); static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); struct intel_limit { @@ -3449,7 +3450,7 @@ __intel_display_resume(struct drm_device *dev, struct drm_crtc *crtc; int i, ret; - intel_modeset_setup_hw_state(dev); + intel_modeset_setup_hw_state(dev, ctx); i915_redisable_vga(to_i915(dev)); if (!state) @@ -5825,7 +5826,8 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_update_watermarks(intel_crtc); } -static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) +static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, + struct drm_modeset_acquire_ctx *ctx) { struct intel_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); @@ -5855,7 +5857,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) return; } - state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; + state->acquire_ctx = ctx; /* Everything's already locked, -EDEADLK can't happen. */ crtc_state = intel_atomic_get_crtc_state(state, intel_crtc); @@ -15030,7 +15032,7 @@ int intel_modeset_init(struct drm_device *dev) intel_setup_outputs(dev_priv); drm_modeset_lock_all(dev); - intel_modeset_setup_hw_state(dev); + intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx); drm_modeset_unlock_all(dev); for_each_intel_crtc(dev, crtc) { @@ -15067,13 +15069,13 @@ int intel_modeset_init(struct drm_device *dev) return 0; } -static void intel_enable_pipe_a(struct drm_device *dev) +static void intel_enable_pipe_a(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx) { struct intel_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_connector *crt = NULL; struct intel_load_detect_pipe load_detect_temp; - struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx; int ret; /* We can't just switch on the pipe A, we need to set things up with a @@ -15145,7 +15147,8 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv, (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); } -static void intel_sanitize_crtc(struct intel_crtc *crtc) +static void intel_sanitize_crtc(struct intel_crtc *crtc, + struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -15191,7 +15194,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) plane = crtc->plane; crtc->base.primary->state->visible = true; crtc->plane = !plane; - intel_crtc_disable_noatomic(&crtc->base); + intel_crtc_disable_noatomic(&crtc->base, ctx); crtc->plane = plane; } @@ -15201,13 +15204,13 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) * resume. Force-enable the pipe to fix this, the update_dpms * call below we restore the pipe to the right state, but leave * the required bits on. */ - intel_enable_pipe_a(dev); + intel_enable_pipe_a(dev, ctx); } /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ if (crtc->active && !intel_crtc_has_encoders(crtc)) - intel_crtc_disable_noatomic(&crtc->base); + intel_crtc_disable_noatomic(&crtc->base, ctx); if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) { /* @@ -15505,7 +15508,8 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) * and sanitizes it to the current state */ static void -intel_modeset_setup_hw_state(struct drm_device *dev) +intel_modeset_setup_hw_state(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx) { struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe; @@ -15525,7 +15529,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev) for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - intel_sanitize_crtc(crtc); + intel_sanitize_crtc(crtc, ctx); intel_dump_pipe_config(crtc, crtc->config, "[setup_hw_state]"); } diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index 6532e226db29..40ba3134545e 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -119,8 +119,6 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector, struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); struct intel_panel *panel = &connector->panel; - intel_dp_aux_enable_backlight(connector); - if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) panel->backlight.max = 0xFFFF; else diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index dac4e003c1f3..62f44d3e7c43 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; - assert_ring_tail_valid(rq->ring, rq->tail); - reg_state[CTX_RING_TAIL+1] = rq->tail; + reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); /* True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. @@ -2036,8 +2035,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) ce->state->obj->mm.dirty = true; i915_gem_object_unpin_map(ce->state->obj); - ce->ring->head = ce->ring->tail = 0; - intel_ring_update_space(ce->ring); + intel_ring_reset(ce->ring, 0); } } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 66a2b8b83972..513a0f4b469b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size) void intel_ring_update_space(struct intel_ring *ring) { - ring->space = __intel_ring_space(ring->head, ring->tail, ring->size); + ring->space = __intel_ring_space(ring->head, ring->emit, ring->size); } static int @@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); - assert_ring_tail_valid(request->ring, request->tail); - I915_WRITE_TAIL(request->engine, request->tail); + I915_WRITE_TAIL(request->engine, + intel_ring_set_tail(request->ring, request->tail)); } static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) @@ -1316,11 +1316,23 @@ err: return PTR_ERR(addr); } +void intel_ring_reset(struct intel_ring *ring, u32 tail) +{ + GEM_BUG_ON(!list_empty(&ring->request_list)); + ring->tail = tail; + ring->head = tail; + ring->emit = tail; + intel_ring_update_space(ring); +} + void intel_ring_unpin(struct intel_ring *ring) { GEM_BUG_ON(!ring->vma); GEM_BUG_ON(!ring->vaddr); + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->tail); + if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); else @@ -1562,8 +1574,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; + /* Restart from the beginning of the rings for convenience */ for_each_engine(engine, dev_priv, id) - engine->buffer->head = engine->buffer->tail; + intel_ring_reset(engine->buffer, 0); } static int ring_request_alloc(struct drm_i915_gem_request *request) @@ -1616,7 +1629,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) unsigned space; /* Would completion of this request free enough space? */ - space = __intel_ring_space(target->postfix, ring->tail, + space = __intel_ring_space(target->postfix, ring->emit, ring->size); if (space >= bytes) break; @@ -1641,8 +1654,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { struct intel_ring *ring = req->ring; - int remain_actual = ring->size - ring->tail; - int remain_usable = ring->effective_size - ring->tail; + int remain_actual = ring->size - ring->emit; + int remain_usable = ring->effective_size - ring->emit; int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; @@ -1678,17 +1691,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) if (unlikely(need_wrap)) { GEM_BUG_ON(remain_actual > ring->space); - GEM_BUG_ON(ring->tail + remain_actual > ring->size); + GEM_BUG_ON(ring->emit + remain_actual > ring->size); /* Fill the tail with MI_NOOP */ - memset(ring->vaddr + ring->tail, 0, remain_actual); - ring->tail = 0; + memset(ring->vaddr + ring->emit, 0, remain_actual); + ring->emit = 0; ring->space -= remain_actual; } - GEM_BUG_ON(ring->tail > ring->size - bytes); - cs = ring->vaddr + ring->tail; - ring->tail += bytes; + GEM_BUG_ON(ring->emit > ring->size - bytes); + cs = ring->vaddr + ring->emit; + ring->emit += bytes; ring->space -= bytes; GEM_BUG_ON(ring->space < 0); @@ -1699,7 +1712,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { int num_dwords = - (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); u32 *cs; if (num_dwords == 0) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a82a0807f64d..f7144fe09613 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -145,6 +145,7 @@ struct intel_ring { u32 head; u32 tail; + u32 emit; int space; int size; @@ -488,6 +489,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias); +void intel_ring_reset(struct intel_ring *ring, u32 tail); +void intel_ring_update_space(struct intel_ring *ring); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_free(struct intel_ring *ring); @@ -511,7 +514,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); + GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); } static inline u32 @@ -540,7 +543,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) GEM_BUG_ON(tail >= ring->size); } -void intel_ring_update_space(struct intel_ring *ring); +static inline unsigned int +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) +{ + /* Whilst writes to the tail are strictly order, there is no + * serialisation between readers and the writers. The tail may be + * read by i915_gem_request_retire() just as it is being updated + * by execlists, as although the breadcrumb is complete, the context + * switch hasn't been seen. + */ + assert_ring_tail_valid(ring, tail); + ring->tail = tail; + return tail; +} void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 432480ff9d22..3178ba0c537c 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -3393,6 +3393,13 @@ void radeon_combios_asic_init(struct drm_device *dev) rdev->pdev->subsystem_vendor == 0x103c && rdev->pdev->subsystem_device == 0x280a) return; + /* quirk for rs4xx Toshiba Sattellite L20-183 latop to make it resume + * - it hangs on resume inside the dynclk 1 table. + */ + if (rdev->family == CHIP_RS400 && + rdev->pdev->subsystem_vendor == 0x1179 && + rdev->pdev->subsystem_device == 0xff31) + return; /* DYN CLK 1 */ table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 6ecf42783d4b..0a6444d72000 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -136,6 +136,10 @@ static struct radeon_px_quirk radeon_px_quirk_list[] = { * https://bugzilla.kernel.org/show_bug.cgi?id=51381 */ { PCI_VENDOR_ID_ATI, 0x6840, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX }, + /* Asus K53TK laptop with AMD A6-3420M APU and Radeon 7670m GPU + * https://bugs.freedesktop.org/show_bug.cgi?id=101491 + */ + { PCI_VENDOR_ID_ATI, 0x6741, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX }, /* macbook pro 8.2 */ { PCI_VENDOR_ID_ATI, 0x6741, PCI_VENDOR_ID_APPLE, 0x00e2, RADEON_PX_QUIRK_LONG_WAKEUP }, { 0, 0, 0, 0, 0 }, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 13db8a2851ed..1f013d45c9e9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -321,6 +321,7 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man) list_for_each_entry_safe(entry, next, &man->list, head) vmw_cmdbuf_res_free(man, entry); + drm_ht_remove(&man->resources); kfree(man); } diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 95ed17183e73..54a47b40546f 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -734,9 +734,9 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx, * the first read operation, otherwise the first read cost * one extra clock cycle. */ - temp = readb(i2c_imx->base + IMX_I2C_I2CR); + temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); temp |= I2CR_MTX; - writeb(temp, i2c_imx->base + IMX_I2C_I2CR); + imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); } msgs->buf[msgs->len-1] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); @@ -857,9 +857,9 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo * the first read operation, otherwise the first read cost * one extra clock cycle. */ - temp = readb(i2c_imx->base + IMX_I2C_I2CR); + temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); temp |= I2CR_MTX; - writeb(temp, i2c_imx->base + IMX_I2C_I2CR); + imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); } } else if (i == (msgs->len - 2)) { dev_dbg(&i2c_imx->adapter.dev, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9f7e18612322..dc2f59e33971 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -223,8 +223,8 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static void mlx5_query_port_roce(struct ib_device *device, u8 port_num, - struct ib_port_attr *props) +static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, + struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; @@ -232,12 +232,14 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num, enum ib_mtu ndev_ib_mtu; u16 qkey_viol_cntr; u32 eth_prot_oper; + int err; /* Possible bad flows are checked before filling out props so in case * of an error it will still be zeroed out. */ - if (mlx5_query_port_eth_proto_oper(mdev, ð_prot_oper, port_num)) - return; + err = mlx5_query_port_eth_proto_oper(mdev, ð_prot_oper, port_num); + if (err) + return err; translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width); @@ -258,7 +260,7 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num, ndev = mlx5_ib_get_netdev(device, port_num); if (!ndev) - return; + return 0; if (mlx5_lag_is_active(dev->mdev)) { rcu_read_lock(); @@ -281,75 +283,49 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num, dev_put(ndev); props->active_mtu = min(props->max_mtu, ndev_ib_mtu); + return 0; } -static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void *mlx5_addr) +static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, + unsigned int index, const union ib_gid *gid, + const struct ib_gid_attr *attr) { -#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) - char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, - source_l3_address); - void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, - source_mac_47_32); - - if (!gid) - return; + enum ib_gid_type gid_type = IB_GID_TYPE_IB; + u8 roce_version = 0; + u8 roce_l3_type = 0; + bool vlan = false; + u8 mac[ETH_ALEN]; + u16 vlan_id = 0; - ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr); + if (gid) { + gid_type = attr->gid_type; + ether_addr_copy(mac, attr->ndev->dev_addr); - if (is_vlan_dev(attr->ndev)) { - MLX5_SET_RA(mlx5_addr, vlan_valid, 1); - MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev)); + if (is_vlan_dev(attr->ndev)) { + vlan = true; + vlan_id = vlan_dev_vlan_id(attr->ndev); + } } - switch (attr->gid_type) { + switch (gid_type) { case IB_GID_TYPE_IB: - MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1); + roce_version = MLX5_ROCE_VERSION_1; break; case IB_GID_TYPE_ROCE_UDP_ENCAP: - MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2); + roce_version = MLX5_ROCE_VERSION_2; + if (ipv6_addr_v4mapped((void *)gid)) + roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4; + else + roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6; break; default: - WARN_ON(true); + mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type); } - if (attr->gid_type != IB_GID_TYPE_IB) { - if (ipv6_addr_v4mapped((void *)gid)) - MLX5_SET_RA(mlx5_addr, roce_l3_type, - MLX5_ROCE_L3_TYPE_IPV4); - else - MLX5_SET_RA(mlx5_addr, roce_l3_type, - MLX5_ROCE_L3_TYPE_IPV6); - } - - if ((attr->gid_type == IB_GID_TYPE_IB) || - !ipv6_addr_v4mapped((void *)gid)) - memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid)); - else - memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4); -} - -static int set_roce_addr(struct ib_device *device, u8 port_num, - unsigned int index, - const union ib_gid *gid, - const struct ib_gid_attr *attr) -{ - struct mlx5_ib_dev *dev = to_mdev(device); - u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0}; - void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); - enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num); - - if (ll != IB_LINK_LAYER_ETHERNET) - return -EINVAL; - - ib_gid_to_mlx5_roce_addr(gid, attr, in_addr); - - MLX5_SET(set_roce_address_in, in, roce_address_index, index); - MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); - return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, + roce_l3_type, gid->raw, mac, vlan, + vlan_id); } static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, @@ -357,13 +333,13 @@ static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, const struct ib_gid_attr *attr, __always_unused void **context) { - return set_roce_addr(device, port_num, index, gid, attr); + return set_roce_addr(to_mdev(device), port_num, index, gid, attr); } static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, unsigned int index, __always_unused void **context) { - return set_roce_addr(device, port_num, index, NULL, NULL); + return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL); } __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, @@ -978,20 +954,31 @@ out: int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { + unsigned int count; + int ret; + switch (mlx5_get_vport_access_method(ibdev)) { case MLX5_VPORT_ACCESS_METHOD_MAD: - return mlx5_query_mad_ifc_port(ibdev, port, props); + ret = mlx5_query_mad_ifc_port(ibdev, port, props); + break; case MLX5_VPORT_ACCESS_METHOD_HCA: - return mlx5_query_hca_port(ibdev, port, props); + ret = mlx5_query_hca_port(ibdev, port, props); + break; case MLX5_VPORT_ACCESS_METHOD_NIC: - mlx5_query_port_roce(ibdev, port, props); - return 0; + ret = mlx5_query_port_roce(ibdev, port, props); + break; default: - return -EINVAL; + ret = -EINVAL; + } + + if (!ret && props) { + count = mlx5_core_reserved_gids_count(to_mdev(ibdev)->mdev); + props->gid_tbl_len -= count; } + return ret; } static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 30b256a2c54e..de4025deaa4a 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -742,7 +742,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, if (type == NES_TIMER_TYPE_SEND) { new_send->seq_num = ntohl(tcp_hdr(skb)->seq); - atomic_inc(&new_send->skb->users); + refcount_inc(&new_send->skb->users); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); cm_node->send_entry = new_send; add_ref_cm_node(cm_node); @@ -924,7 +924,7 @@ static void nes_cm_timer_tick(unsigned long pass) flags); break; } - atomic_inc(&send_entry->skb->users); + refcount_inc(&send_entry->skb->users); cm_packets_retrans++; nes_debug(NES_DBG_CM, "Retransmitting send_entry %p " "for node %p, jiffies = %lu, time to send = " diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index e37d37273182..f600f3a7a3c6 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -248,7 +248,8 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev) if (!btns_desc) { dev_err(dev, "ACPI Button Descriptors not found\n"); - return ERR_PTR(-ENODEV); + button_info = ERR_PTR(-ENODEV); + goto out; } /* The first package describes the collection */ @@ -264,24 +265,31 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev) } if (collection_uid == -1) { dev_err(dev, "Invalid Button Collection Descriptor\n"); - return ERR_PTR(-ENODEV); + button_info = ERR_PTR(-ENODEV); + goto out; } /* There are package.count - 1 buttons + 1 terminating empty entry */ button_info = devm_kcalloc(dev, btns_desc->package.count, sizeof(*button_info), GFP_KERNEL); - if (!button_info) - return ERR_PTR(-ENOMEM); + if (!button_info) { + button_info = ERR_PTR(-ENOMEM); + goto out; + } /* Parse the button descriptors */ for (i = 1, btn = 0; i < btns_desc->package.count; i++, btn++) { if (soc_button_parse_btn_desc(dev, &btns_desc->package.elements[i], collection_uid, - &button_info[btn])) - return ERR_PTR(-ENODEV); + &button_info[btn])) { + button_info = ERR_PTR(-ENODEV); + goto out; + } } +out: + kfree(buf.pointer); return button_info; } diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index dea63e2db3e6..f5206e2c767e 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -31,9 +31,6 @@ #define F54_GET_REPORT 1 #define F54_FORCE_CAL 2 -/* Fixed sizes of reports */ -#define F54_QUERY_LEN 27 - /* F54 capabilities */ #define F54_CAP_BASELINE (1 << 2) #define F54_CAP_IMAGE8 (1 << 3) @@ -95,7 +92,6 @@ struct rmi_f54_reports { struct f54_data { struct rmi_function *fn; - u8 qry[F54_QUERY_LEN]; u8 num_rx_electrodes; u8 num_tx_electrodes; u8 capabilities; @@ -632,22 +628,23 @@ static int rmi_f54_detect(struct rmi_function *fn) { int error; struct f54_data *f54; + u8 buf[6]; f54 = dev_get_drvdata(&fn->dev); error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, - &f54->qry, sizeof(f54->qry)); + buf, sizeof(buf)); if (error) { dev_err(&fn->dev, "%s: Failed to query F54 properties\n", __func__); return error; } - f54->num_rx_electrodes = f54->qry[0]; - f54->num_tx_electrodes = f54->qry[1]; - f54->capabilities = f54->qry[2]; - f54->clock_rate = f54->qry[3] | (f54->qry[4] << 8); - f54->family = f54->qry[5]; + f54->num_rx_electrodes = buf[0]; + f54->num_tx_electrodes = buf[1]; + f54->capabilities = buf[2]; + f54->clock_rate = buf[3] | (buf[4] << 8); + f54->family = buf[5]; rmi_dbg(RMI_DEBUG_FN, &fn->dev, "F54 num_rx_electrodes: %d\n", f54->num_rx_electrodes); diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 09720d950686..f932a83b4990 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -723,6 +723,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"), }, }, + { + /* Fujitsu UH554 laptop */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"), + }, + }, { } }; diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index eb7fbe159963..929f8558bf1c 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -140,7 +140,7 @@ static inline void gic_map_to_vpe(unsigned int intr, unsigned int vpe) } #ifdef CONFIG_CLKSRC_MIPS_GIC -u64 gic_read_count(void) +u64 notrace gic_read_count(void) { unsigned int hi, hi2, lo; @@ -167,7 +167,7 @@ unsigned int gic_get_count_width(void) return bits; } -void gic_write_compare(u64 cnt) +void notrace gic_write_compare(u64 cnt) { if (mips_cm_is64) { gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE), cnt); @@ -179,7 +179,7 @@ void gic_write_compare(u64 cnt) } } -void gic_write_cpu_compare(u64 cnt, int cpu) +void notrace gic_write_cpu_compare(u64 cnt, int cpu) { unsigned long flags; diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 99e5f9751e8b..c5603d1a07d6 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -155,7 +155,7 @@ mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, copied = skb->len + MISDN_HEADER_LEN; if (len < copied) { if (flags & MSG_PEEK) - atomic_dec(&skb->users); + refcount_dec(&skb->users); else skb_queue_head(&sk->sk_receive_queue, skb); return -ENOSPC; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 7910bfe50da4..93b181088168 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1105,10 +1105,13 @@ static void schedule_autocommit(struct dm_integrity_c *ic) static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio) { struct bio *bio; - spin_lock_irq(&ic->endio_wait.lock); + unsigned long flags; + + spin_lock_irqsave(&ic->endio_wait.lock, flags); bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); bio_list_add(&ic->flush_bio_list, bio); - spin_unlock_irq(&ic->endio_wait.lock); + spin_unlock_irqrestore(&ic->endio_wait.lock, flags); + queue_work(ic->commit_wq, &ic->commit_work); } @@ -3040,6 +3043,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->error = "The device is too small"; goto bad; } + if (ti->len > ic->provided_data_sectors) { + r = -EINVAL; + ti->error = "Not enough provided sectors for requested mapping size"; + goto bad; + } if (!buffer_sectors) buffer_sectors = 1; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 3702e502466d..8d5ca30f6551 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -317,8 +317,8 @@ static void do_region(int op, int op_flags, unsigned region, else if (op == REQ_OP_WRITE_SAME) special_cmd_max_sectors = q->limits.max_write_same_sectors; if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES || - op == REQ_OP_WRITE_SAME) && - special_cmd_max_sectors == 0) { + op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) { + atomic_inc(&io->count); dec_count(io, region, -EOPNOTSUPP); return; } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 7d893228c40f..b4b75dad816a 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1927,7 +1927,7 @@ struct dm_raid_superblock { /******************************************************************** * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!! * - * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist + * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist */ __le32 flags; /* Flags defining array states for reshaping */ @@ -2092,6 +2092,11 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) sb->layout = cpu_to_le32(mddev->layout); sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors); + /******************************************************************** + * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!! + * + * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist + */ sb->new_level = cpu_to_le32(mddev->new_level); sb->new_layout = cpu_to_le32(mddev->new_layout); sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors); @@ -2438,8 +2443,14 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) mddev->bitmap_info.default_offset = mddev->bitmap_info.offset; if (!test_and_clear_bit(FirstUse, &rdev->flags)) { - /* Retrieve device size stored in superblock to be prepared for shrink */ - rdev->sectors = le64_to_cpu(sb->sectors); + /* + * Retrieve rdev size stored in superblock to be prepared for shrink. + * Check extended superblock members are present otherwise the size + * will not be set! + */ + if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) + rdev->sectors = le64_to_cpu(sb->sectors); + rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset); if (rdev->recovery_offset == MaxSector) set_bit(In_sync, &rdev->flags); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index e61c45047c25..4da8858856fb 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -145,6 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) struct dm_raid1_bio_record { struct mirror *m; + /* if details->bi_bdev == NULL, details were not saved */ struct dm_bio_details details; region_t write_region; }; @@ -1198,6 +1199,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio) struct dm_raid1_bio_record *bio_record = dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); + bio_record->details.bi_bdev = NULL; + if (rw == WRITE) { /* Save region for mirror_end_io() handler */ bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio); @@ -1256,12 +1259,22 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) } if (error == -EOPNOTSUPP) - return error; + goto out; if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD)) - return error; + goto out; if (unlikely(error)) { + if (!bio_record->details.bi_bdev) { + /* + * There wasn't enough memory to record necessary + * information for a retry or there was no other + * mirror in-sync. + */ + DMERR_LIMIT("Mirror read failed."); + return -EIO; + } + m = bio_record->m; DMERR("Mirror read failed from %s. Trying alternative device.", @@ -1277,6 +1290,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) bd = &bio_record->details; dm_bio_restore(bd, bio); + bio_record->details.bi_bdev = NULL; bio->bi_error = 0; queue_bio(ms, bio, rw); @@ -1285,6 +1299,9 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) DMERR("All replicated volumes dead, failing I/O"); } +out: + bio_record->details.bi_bdev = NULL; + return error; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 17ad50daed08..28808e5ec0fd 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1094,6 +1094,19 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) return; } + /* + * Increment the unmapped blocks. This prevents a race between the + * passdown io and reallocation of freed blocks. + */ + r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end); + if (r) { + metadata_operation_failed(pool, "dm_pool_inc_data_range", r); + bio_io_error(m->bio); + cell_defer_no_holder(tc, m->cell); + mempool_free(m, pool->mapping_pool); + return; + } + discard_parent = bio_alloc(GFP_NOIO, 1); if (!discard_parent) { DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.", @@ -1114,19 +1127,6 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m) end_discard(&op, r); } } - - /* - * Increment the unmapped blocks. This prevents a race between the - * passdown io and reallocation of freed blocks. - */ - r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end); - if (r) { - metadata_operation_failed(pool, "dm_pool_inc_data_range", r); - bio_io_error(m->bio); - cell_defer_no_holder(tc, m->cell); - mempool_free(m, pool->mapping_pool); - return; - } } static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m) diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index 75488e65cd96..8d46e3ad9529 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -245,8 +245,7 @@ static int arizona_poll_reg(struct arizona *arizona, int ret; ret = regmap_read_poll_timeout(arizona->regmap, - ARIZONA_INTERRUPT_RAW_STATUS_5, val, - ((val & mask) == target), + reg, val, ((val & mask) == target), ARIZONA_REG_POLL_DELAY_US, timeout_ms * 1000); if (ret) diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index 20bfb9ba83ea..cbb4f8566bbe 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -269,6 +269,10 @@ struct arcnet_local { struct timer_list timer; + struct net_device *dev; + int reply_status; + struct tasklet_struct reply_tasklet; + /* * Buffer management: an ARCnet card has 4 x 512-byte buffers, each of * which can be used for either sending or receiving. The new dynamic diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 62ee439d5882..fcfccbb3d9a2 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -51,6 +51,7 @@ #include <net/arp.h> #include <linux/init.h> #include <linux/jiffies.h> +#include <linux/errqueue.h> #include <linux/leds.h> @@ -391,6 +392,52 @@ static void arcnet_timer(unsigned long data) } } +static void arcnet_reply_tasklet(unsigned long data) +{ + struct arcnet_local *lp = (struct arcnet_local *)data; + + struct sk_buff *ackskb, *skb; + struct sock_exterr_skb *serr; + struct sock *sk; + int ret; + + local_irq_disable(); + skb = lp->outgoing.skb; + if (!skb || !skb->sk) { + local_irq_enable(); + return; + } + + sock_hold(skb->sk); + sk = skb->sk; + ackskb = skb_clone_sk(skb); + sock_put(skb->sk); + + if (!ackskb) { + local_irq_enable(); + return; + } + + serr = SKB_EXT_ERR(ackskb); + memset(serr, 0, sizeof(*serr)); + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; + serr->ee.ee_data = skb_shinfo(skb)->tskey; + serr->ee.ee_info = lp->reply_status; + + /* finally erasing outgoing skb */ + dev_kfree_skb(lp->outgoing.skb); + lp->outgoing.skb = NULL; + + ackskb->dev = lp->dev; + + ret = sock_queue_err_skb(sk, ackskb); + if (ret) + kfree_skb(ackskb); + + local_irq_enable(); +}; + struct net_device *alloc_arcdev(const char *name) { struct net_device *dev; @@ -401,6 +448,7 @@ struct net_device *alloc_arcdev(const char *name) if (dev) { struct arcnet_local *lp = netdev_priv(dev); + lp->dev = dev; spin_lock_init(&lp->lock); init_timer(&lp->timer); lp->timer.data = (unsigned long) dev; @@ -436,6 +484,9 @@ int arcnet_open(struct net_device *dev) arc_cont(D_PROTO, "\n"); } + tasklet_init(&lp->reply_tasklet, arcnet_reply_tasklet, + (unsigned long)lp); + arc_printk(D_INIT, dev, "arcnet_open: resetting card.\n"); /* try to put the card in a defined state - if it fails the first @@ -527,6 +578,8 @@ int arcnet_close(struct net_device *dev) netif_stop_queue(dev); netif_carrier_off(dev); + tasklet_kill(&lp->reply_tasklet); + /* flush TX and disable RX */ lp->hw.intmask(dev, 0); lp->hw.command(dev, NOTXcmd); /* stop transmit */ @@ -635,13 +688,13 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb, txbuf = -1; if (txbuf != -1) { + lp->outgoing.skb = skb; if (proto->prepare_tx(dev, pkt, skb->len, txbuf) && !proto->ack_tx) { /* done right away and we don't want to acknowledge * the package later - forget about it now */ dev->stats.tx_bytes += skb->len; - dev_kfree_skb(skb); } else { /* do it the 'split' way */ lp->outgoing.proto = proto; @@ -756,6 +809,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) struct net_device *dev = dev_id; struct arcnet_local *lp; int recbuf, status, diagstatus, didsomething, boguscount; + unsigned long flags; int retval = IRQ_NONE; arc_printk(D_DURING, dev, "\n"); @@ -765,7 +819,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) lp = netdev_priv(dev); BUG_ON(!lp); - spin_lock(&lp->lock); + spin_lock_irqsave(&lp->lock, flags); /* RESET flag was enabled - if device is not running, we must * clear it right away (but nothing else). @@ -774,7 +828,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) if (lp->hw.status(dev) & RESETflag) lp->hw.command(dev, CFLAGScmd | RESETclear); lp->hw.intmask(dev, 0); - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); return retval; } @@ -842,8 +896,16 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) /* a transmit finished, and we're interested in it. */ if ((status & lp->intmask & TXFREEflag) || lp->timed_out) { + int ackstatus; lp->intmask &= ~(TXFREEflag | EXCNAKflag); + if (status & TXACKflag) + ackstatus = 2; + else if (lp->excnak_pending) + ackstatus = 1; + else + ackstatus = 0; + arc_printk(D_DURING, dev, "TX IRQ (stat=%Xh)\n", status); @@ -866,18 +928,11 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) if (lp->outgoing.proto && lp->outgoing.proto->ack_tx) { - int ackstatus; - - if (status & TXACKflag) - ackstatus = 2; - else if (lp->excnak_pending) - ackstatus = 1; - else - ackstatus = 0; - lp->outgoing.proto ->ack_tx(dev, ackstatus); } + lp->reply_status = ackstatus; + tasklet_hi_schedule(&lp->reply_tasklet); } if (lp->cur_tx != -1) release_arcbuf(dev, lp->cur_tx); @@ -998,7 +1053,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) udelay(1); lp->hw.intmask(dev, lp->intmask); - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); return retval; } EXPORT_SYMBOL(arcnet_interrupt); diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c index a80f4eb9262d..b780be6f41ff 100644 --- a/drivers/net/arcnet/capmode.c +++ b/drivers/net/arcnet/capmode.c @@ -212,7 +212,7 @@ static int ack_tx(struct net_device *dev, int acked) ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */ ackpkt->soft.cap.mes.ack = acked; - arc_printk(D_PROTO, dev, "Ackknowledge for cap packet %x.\n", + arc_printk(D_PROTO, dev, "Acknowledge for cap packet %x.\n", *((int *)&ackpkt->soft.cap.cookie[0])); ackskb->protocol = cpu_to_be16(ETH_P_ARCNET); diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 239de38fbd6a..2d956cb59d06 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -93,6 +93,27 @@ static void led_recon_set(struct led_classdev *led_cdev, outb(!!value, priv->misc + ci->leds[card->index].red); } +static ssize_t backplane_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *net_dev = to_net_dev(dev); + struct arcnet_local *lp = netdev_priv(net_dev); + + return sprintf(buf, "%s\n", lp->backplane ? "true" : "false"); +} +static DEVICE_ATTR_RO(backplane_mode); + +static struct attribute *com20020_state_attrs[] = { + &dev_attr_backplane_mode.attr, + NULL, +}; + +static struct attribute_group com20020_state_group = { + .name = NULL, + .attrs = com20020_state_attrs, +}; + static void com20020pci_remove(struct pci_dev *pdev); static int com20020pci_probe(struct pci_dev *pdev, @@ -135,6 +156,7 @@ static int com20020pci_probe(struct pci_dev *pdev, for (i = 0; i < ci->devcount; i++) { struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i]; struct com20020_dev *card; + int dev_id_mask = 0xf; dev = alloc_arcdev(device); if (!dev) { @@ -166,8 +188,10 @@ static int com20020pci_probe(struct pci_dev *pdev, arcnet_outb(0x00, ioaddr, COM20020_REG_W_COMMAND); arcnet_inb(ioaddr, COM20020_REG_R_DIAGSTAT); + SET_NETDEV_DEV(dev, &pdev->dev); dev->base_addr = ioaddr; dev->dev_addr[0] = node; + dev->sysfs_groups[0] = &com20020_state_group; dev->irq = pdev->irq; lp->card_name = "PCI COM20020"; lp->card_flags = ci->flags; @@ -177,10 +201,15 @@ static int com20020pci_probe(struct pci_dev *pdev, lp->timeout = timeout; lp->hw.owner = THIS_MODULE; + lp->backplane = (inb(priv->misc) >> (2 + i)) & 0x1; + + if (!strncmp(ci->name, "EAE PLX-PCI FB2", 15)) + lp->backplane = 1; + /* Get the dev_id from the PLX rotary coder */ if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15)) - dev->dev_id = 0xc; - dev->dev_id ^= inb(priv->misc + ci->rotary) >> 4; + dev_id_mask = 0x3; + dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask; snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); @@ -361,6 +390,31 @@ static struct com20020_pci_card_info card_info_eae_ma1 = { .flags = ARC_CAN_10MBIT, }; +static struct com20020_pci_card_info card_info_eae_fb2 = { + .name = "EAE PLX-PCI FB2", + .devcount = 1, + .chan_map_tbl = { + { + .bar = 2, + .offset = 0x00, + .size = 0x08, + }, + }, + .misc_map = { + .bar = 2, + .offset = 0x10, + .size = 0x04, + }, + .leds = { + { + .green = 0x0, + .red = 0x1, + }, + }, + .rotary = 0x0, + .flags = ARC_CAN_10MBIT, +}; + static const struct pci_device_id com20020pci_id_table[] = { { 0x1571, 0xa001, @@ -507,6 +561,12 @@ static const struct pci_device_id com20020pci_id_table[] = { (kernel_ulong_t)&card_info_eae_ma1 }, { + 0x10B5, 0x9050, + 0x10B5, 0x3294, + 0, 0, + (kernel_ulong_t)&card_info_eae_fb2 + }, + { 0x14BA, 0x6000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c index 13d9ad4b3f5c..78043a9c5981 100644 --- a/drivers/net/arcnet/com20020.c +++ b/drivers/net/arcnet/com20020.c @@ -246,8 +246,6 @@ int com20020_found(struct net_device *dev, int shared) return -ENODEV; } - dev->base_addr = ioaddr; - arc_printk(D_NORMAL, dev, "%s: station %02Xh found at %03lXh, IRQ %d.\n", lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 8ca683396fcc..a12d603d41c6 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -464,7 +464,7 @@ const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val) /* Searches for a value in opt's values[] table which matches the flagmask */ static const struct bond_opt_value *bond_opt_get_flags(const struct bond_option *opt, - u32 flagmask) + u32 flagmask) { int i; @@ -744,14 +744,14 @@ static int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval) { if (!bond_mode_uses_arp(newval->value) && bond->params.arp_interval) { - netdev_info(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n", - newval->string); + netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n", + newval->string); /* disable arp monitoring */ bond->params.arp_interval = 0; /* set miimon to default value */ bond->params.miimon = BOND_DEFAULT_MIIMON; - netdev_info(bond->dev, "Setting MII monitoring interval to %d\n", - bond->params.miimon); + netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n", + bond->params.miimon); } /* don't cache arp_validate between modes */ @@ -794,7 +794,7 @@ static int bond_option_active_slave_set(struct bonding *bond, block_netpoll_tx(); /* check to see if we are clearing active */ if (!slave_dev) { - netdev_info(bond->dev, "Clearing current active slave\n"); + netdev_dbg(bond->dev, "Clearing current active slave\n"); RCU_INIT_POINTER(bond->curr_active_slave, NULL); bond_select_active_slave(bond); } else { @@ -805,13 +805,13 @@ static int bond_option_active_slave_set(struct bonding *bond, if (new_active == old_active) { /* do nothing */ - netdev_info(bond->dev, "%s is already the current active slave\n", - new_active->dev->name); + netdev_dbg(bond->dev, "%s is already the current active slave\n", + new_active->dev->name); } else { if (old_active && (new_active->link == BOND_LINK_UP) && bond_slave_is_up(new_active)) { - netdev_info(bond->dev, "Setting %s as active slave\n", - new_active->dev->name); + netdev_dbg(bond->dev, "Setting %s as active slave\n", + new_active->dev->name); bond_change_active_slave(bond, new_active); } else { netdev_err(bond->dev, "Could not set %s as active slave; either %s is down or the link is down\n", @@ -833,17 +833,17 @@ static int bond_option_active_slave_set(struct bonding *bond, static int bond_option_miimon_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting MII monitoring interval to %llu\n", - newval->value); + netdev_dbg(bond->dev, "Setting MII monitoring interval to %llu\n", + newval->value); bond->params.miimon = newval->value; if (bond->params.updelay) - netdev_info(bond->dev, "Note: Updating updelay (to %d) since it is a multiple of the miimon value\n", - bond->params.updelay * bond->params.miimon); + netdev_dbg(bond->dev, "Note: Updating updelay (to %d) since it is a multiple of the miimon value\n", + bond->params.updelay * bond->params.miimon); if (bond->params.downdelay) - netdev_info(bond->dev, "Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n", - bond->params.downdelay * bond->params.miimon); + netdev_dbg(bond->dev, "Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n", + bond->params.downdelay * bond->params.miimon); if (newval->value && bond->params.arp_interval) { - netdev_info(bond->dev, "MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n"); + netdev_dbg(bond->dev, "MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n"); bond->params.arp_interval = 0; if (bond->params.arp_validate) bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; @@ -885,8 +885,8 @@ static int bond_option_updelay_set(struct bonding *bond, bond->params.miimon); } bond->params.updelay = value / bond->params.miimon; - netdev_info(bond->dev, "Setting up delay to %d\n", - bond->params.updelay * bond->params.miimon); + netdev_dbg(bond->dev, "Setting up delay to %d\n", + bond->params.updelay * bond->params.miimon); return 0; } @@ -907,8 +907,8 @@ static int bond_option_downdelay_set(struct bonding *bond, bond->params.miimon); } bond->params.downdelay = value / bond->params.miimon; - netdev_info(bond->dev, "Setting down delay to %d\n", - bond->params.downdelay * bond->params.miimon); + netdev_dbg(bond->dev, "Setting down delay to %d\n", + bond->params.downdelay * bond->params.miimon); return 0; } @@ -916,8 +916,8 @@ static int bond_option_downdelay_set(struct bonding *bond, static int bond_option_use_carrier_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting use_carrier to %llu\n", - newval->value); + netdev_dbg(bond->dev, "Setting use_carrier to %llu\n", + newval->value); bond->params.use_carrier = newval->value; return 0; @@ -930,16 +930,16 @@ static int bond_option_use_carrier_set(struct bonding *bond, static int bond_option_arp_interval_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting ARP monitoring interval to %llu\n", - newval->value); + netdev_dbg(bond->dev, "Setting ARP monitoring interval to %llu\n", + newval->value); bond->params.arp_interval = newval->value; if (newval->value) { if (bond->params.miimon) { - netdev_info(bond->dev, "ARP monitoring cannot be used with MII monitoring. Disabling MII monitoring\n"); + netdev_dbg(bond->dev, "ARP monitoring cannot be used with MII monitoring. Disabling MII monitoring\n"); bond->params.miimon = 0; } if (!bond->params.arp_targets[0]) - netdev_info(bond->dev, "ARP monitoring has been set up, but no ARP targets have been specified\n"); + netdev_dbg(bond->dev, "ARP monitoring has been set up, but no ARP targets have been specified\n"); } if (bond->dev->flags & IFF_UP) { /* If the interface is up, we may need to fire off @@ -1000,7 +1000,7 @@ static int _bond_option_arp_ip_target_add(struct bonding *bond, __be32 target) return -EINVAL; } - netdev_info(bond->dev, "Adding ARP target %pI4\n", &target); + netdev_dbg(bond->dev, "Adding ARP target %pI4\n", &target); _bond_options_arp_ip_target_set(bond, ind, target, jiffies); @@ -1036,7 +1036,7 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target) if (ind == 0 && !targets[1] && bond->params.arp_interval) netdev_warn(bond->dev, "Removing last arp target with arp_interval on\n"); - netdev_info(bond->dev, "Removing ARP target %pI4\n", &target); + netdev_dbg(bond->dev, "Removing ARP target %pI4\n", &target); bond_for_each_slave(bond, slave, iter) { targets_rx = slave->target_last_arp_rx; @@ -1088,8 +1088,8 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond, static int bond_option_arp_validate_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting arp_validate to %s (%llu)\n", - newval->string, newval->value); + netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n", + newval->string, newval->value); if (bond->dev->flags & IFF_UP) { if (!newval->value) @@ -1105,8 +1105,8 @@ static int bond_option_arp_validate_set(struct bonding *bond, static int bond_option_arp_all_targets_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting arp_all_targets to %s (%llu)\n", - newval->string, newval->value); + netdev_dbg(bond->dev, "Setting arp_all_targets to %s (%llu)\n", + newval->string, newval->value); bond->params.arp_all_targets = newval->value; return 0; @@ -1126,7 +1126,7 @@ static int bond_option_primary_set(struct bonding *bond, *p = '\0'; /* check to see if we are clearing primary */ if (!strlen(primary)) { - netdev_info(bond->dev, "Setting primary slave to None\n"); + netdev_dbg(bond->dev, "Setting primary slave to None\n"); RCU_INIT_POINTER(bond->primary_slave, NULL); memset(bond->params.primary, 0, sizeof(bond->params.primary)); bond_select_active_slave(bond); @@ -1135,8 +1135,8 @@ static int bond_option_primary_set(struct bonding *bond, bond_for_each_slave(bond, slave, iter) { if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) { - netdev_info(bond->dev, "Setting %s as primary slave\n", - slave->dev->name); + netdev_dbg(bond->dev, "Setting %s as primary slave\n", + slave->dev->name); rcu_assign_pointer(bond->primary_slave, slave); strcpy(bond->params.primary, slave->dev->name); bond_select_active_slave(bond); @@ -1145,15 +1145,15 @@ static int bond_option_primary_set(struct bonding *bond, } if (rtnl_dereference(bond->primary_slave)) { - netdev_info(bond->dev, "Setting primary slave to None\n"); + netdev_dbg(bond->dev, "Setting primary slave to None\n"); RCU_INIT_POINTER(bond->primary_slave, NULL); bond_select_active_slave(bond); } strncpy(bond->params.primary, primary, IFNAMSIZ); bond->params.primary[IFNAMSIZ - 1] = 0; - netdev_info(bond->dev, "Recording %s as primary, but it has not been enslaved to %s yet\n", - primary, bond->dev->name); + netdev_dbg(bond->dev, "Recording %s as primary, but it has not been enslaved to %s yet\n", + primary, bond->dev->name); out: unblock_netpoll_tx(); @@ -1164,8 +1164,8 @@ out: static int bond_option_primary_reselect_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting primary_reselect to %s (%llu)\n", - newval->string, newval->value); + netdev_dbg(bond->dev, "Setting primary_reselect to %s (%llu)\n", + newval->string, newval->value); bond->params.primary_reselect = newval->value; block_netpoll_tx(); @@ -1178,8 +1178,8 @@ static int bond_option_primary_reselect_set(struct bonding *bond, static int bond_option_fail_over_mac_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting fail_over_mac to %s (%llu)\n", - newval->string, newval->value); + netdev_dbg(bond->dev, "Setting fail_over_mac to %s (%llu)\n", + newval->string, newval->value); bond->params.fail_over_mac = newval->value; return 0; @@ -1188,8 +1188,8 @@ static int bond_option_fail_over_mac_set(struct bonding *bond, static int bond_option_xmit_hash_policy_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting xmit hash policy to %s (%llu)\n", - newval->string, newval->value); + netdev_dbg(bond->dev, "Setting xmit hash policy to %s (%llu)\n", + newval->string, newval->value); bond->params.xmit_policy = newval->value; return 0; @@ -1198,8 +1198,8 @@ static int bond_option_xmit_hash_policy_set(struct bonding *bond, static int bond_option_resend_igmp_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting resend_igmp to %llu\n", - newval->value); + netdev_dbg(bond->dev, "Setting resend_igmp to %llu\n", + newval->value); bond->params.resend_igmp = newval->value; return 0; @@ -1237,8 +1237,8 @@ static int bond_option_all_slaves_active_set(struct bonding *bond, static int bond_option_min_links_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting min links value to %llu\n", - newval->value); + netdev_dbg(bond->dev, "Setting min links value to %llu\n", + newval->value); bond->params.min_links = newval->value; bond_set_carrier(bond); @@ -1256,6 +1256,8 @@ static int bond_option_lp_interval_set(struct bonding *bond, static int bond_option_pps_set(struct bonding *bond, const struct bond_opt_value *newval) { + netdev_dbg(bond->dev, "Setting packets per slave to %llu\n", + newval->value); bond->params.packets_per_slave = newval->value; if (newval->value > 0) { bond->params.reciprocal_packets_per_slave = @@ -1274,8 +1276,8 @@ static int bond_option_pps_set(struct bonding *bond, static int bond_option_lacp_rate_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting LACP rate to %s (%llu)\n", - newval->string, newval->value); + netdev_dbg(bond->dev, "Setting LACP rate to %s (%llu)\n", + newval->string, newval->value); bond->params.lacp_fast = newval->value; bond_3ad_update_lacp_rate(bond); @@ -1285,8 +1287,8 @@ static int bond_option_lacp_rate_set(struct bonding *bond, static int bond_option_ad_select_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting ad_select to %s (%llu)\n", - newval->string, newval->value); + netdev_dbg(bond->dev, "Setting ad_select to %s (%llu)\n", + newval->string, newval->value); bond->params.ad_select = newval->value; return 0; @@ -1347,7 +1349,7 @@ out: return ret; err_no_cmd: - netdev_info(bond->dev, "invalid input for queue_id set\n"); + netdev_dbg(bond->dev, "invalid input for queue_id set\n"); ret = -EPERM; goto out; @@ -1369,20 +1371,20 @@ static int bond_option_slaves_set(struct bonding *bond, dev = __dev_get_by_name(dev_net(bond->dev), ifname); if (!dev) { - netdev_info(bond->dev, "interface %s does not exist!\n", - ifname); + netdev_dbg(bond->dev, "interface %s does not exist!\n", + ifname); ret = -ENODEV; goto out; } switch (command[0]) { case '+': - netdev_info(bond->dev, "Adding slave %s\n", dev->name); + netdev_dbg(bond->dev, "Adding slave %s\n", dev->name); ret = bond_enslave(bond->dev, dev); break; case '-': - netdev_info(bond->dev, "Removing slave %s\n", dev->name); + netdev_dbg(bond->dev, "Removing slave %s\n", dev->name); ret = bond_release(bond->dev, dev); break; @@ -1402,8 +1404,8 @@ err_no_cmd: static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting dynamic-lb to %s (%llu)\n", - newval->string, newval->value); + netdev_dbg(bond->dev, "Setting dynamic-lb to %s (%llu)\n", + newval->string, newval->value); bond->params.tlb_dynamic_lb = newval->value; return 0; @@ -1412,8 +1414,8 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting ad_actor_sys_prio to %llu\n", - newval->value); + netdev_dbg(bond->dev, "Setting ad_actor_sys_prio to %llu\n", + newval->value); bond->params.ad_actor_sys_prio = newval->value; bond_3ad_update_ad_actor_settings(bond); @@ -1442,7 +1444,7 @@ static int bond_option_ad_actor_system_set(struct bonding *bond, if (!is_valid_ether_addr(mac)) goto err; - netdev_info(bond->dev, "Setting ad_actor_system to %pM\n", mac); + netdev_dbg(bond->dev, "Setting ad_actor_system to %pM\n", mac); ether_addr_copy(bond->params.ad_actor_system, mac); bond_3ad_update_ad_actor_settings(bond); @@ -1456,8 +1458,8 @@ err: static int bond_option_ad_user_port_key_set(struct bonding *bond, const struct bond_opt_value *newval) { - netdev_info(bond->dev, "Setting ad_user_port_key to %llu\n", - newval->value); + netdev_dbg(bond->dev, "Setting ad_user_port_key to %llu\n", + newval->value); bond->params.ad_user_port_key = newval->value; return 0; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 127adbeefb10..9795419aac2d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -123,38 +123,13 @@ #define DMA_ISR 0x3008 #define DMA_AXIARCR 0x3010 #define DMA_AXIAWCR 0x3018 +#define DMA_AXIAWARCR 0x301c #define DMA_DSR0 0x3020 #define DMA_DSR1 0x3024 +#define DMA_TXEDMACR 0x3040 +#define DMA_RXEDMACR 0x3044 /* DMA register entry bit positions and sizes */ -#define DMA_AXIARCR_DRC_INDEX 0 -#define DMA_AXIARCR_DRC_WIDTH 4 -#define DMA_AXIARCR_DRD_INDEX 4 -#define DMA_AXIARCR_DRD_WIDTH 2 -#define DMA_AXIARCR_TEC_INDEX 8 -#define DMA_AXIARCR_TEC_WIDTH 4 -#define DMA_AXIARCR_TED_INDEX 12 -#define DMA_AXIARCR_TED_WIDTH 2 -#define DMA_AXIARCR_THC_INDEX 16 -#define DMA_AXIARCR_THC_WIDTH 4 -#define DMA_AXIARCR_THD_INDEX 20 -#define DMA_AXIARCR_THD_WIDTH 2 -#define DMA_AXIAWCR_DWC_INDEX 0 -#define DMA_AXIAWCR_DWC_WIDTH 4 -#define DMA_AXIAWCR_DWD_INDEX 4 -#define DMA_AXIAWCR_DWD_WIDTH 2 -#define DMA_AXIAWCR_RPC_INDEX 8 -#define DMA_AXIAWCR_RPC_WIDTH 4 -#define DMA_AXIAWCR_RPD_INDEX 12 -#define DMA_AXIAWCR_RPD_WIDTH 2 -#define DMA_AXIAWCR_RHC_INDEX 16 -#define DMA_AXIAWCR_RHC_WIDTH 4 -#define DMA_AXIAWCR_RHD_INDEX 20 -#define DMA_AXIAWCR_RHD_WIDTH 2 -#define DMA_AXIAWCR_TDC_INDEX 24 -#define DMA_AXIAWCR_TDC_WIDTH 4 -#define DMA_AXIAWCR_TDD_INDEX 28 -#define DMA_AXIAWCR_TDD_WIDTH 2 #define DMA_ISR_MACIS_INDEX 17 #define DMA_ISR_MACIS_WIDTH 1 #define DMA_ISR_MTLIS_INDEX 16 @@ -163,14 +138,31 @@ #define DMA_MR_INTM_WIDTH 2 #define DMA_MR_SWR_INDEX 0 #define DMA_MR_SWR_WIDTH 1 +#define DMA_RXEDMACR_RDPS_INDEX 0 +#define DMA_RXEDMACR_RDPS_WIDTH 3 +#define DMA_SBMR_AAL_INDEX 12 +#define DMA_SBMR_AAL_WIDTH 1 #define DMA_SBMR_EAME_INDEX 11 #define DMA_SBMR_EAME_WIDTH 1 -#define DMA_SBMR_BLEN_256_INDEX 7 -#define DMA_SBMR_BLEN_256_WIDTH 1 +#define DMA_SBMR_BLEN_INDEX 1 +#define DMA_SBMR_BLEN_WIDTH 7 +#define DMA_SBMR_RD_OSR_LMT_INDEX 16 +#define DMA_SBMR_RD_OSR_LMT_WIDTH 6 #define DMA_SBMR_UNDEF_INDEX 0 #define DMA_SBMR_UNDEF_WIDTH 1 +#define DMA_SBMR_WR_OSR_LMT_INDEX 24 +#define DMA_SBMR_WR_OSR_LMT_WIDTH 6 +#define DMA_TXEDMACR_TDPS_INDEX 0 +#define DMA_TXEDMACR_TDPS_WIDTH 3 /* DMA register values */ +#define DMA_SBMR_BLEN_256 256 +#define DMA_SBMR_BLEN_128 128 +#define DMA_SBMR_BLEN_64 64 +#define DMA_SBMR_BLEN_32 32 +#define DMA_SBMR_BLEN_16 16 +#define DMA_SBMR_BLEN_8 8 +#define DMA_SBMR_BLEN_4 4 #define DMA_DSR_RPS_WIDTH 4 #define DMA_DSR_TPS_WIDTH 4 #define DMA_DSR_Q_WIDTH (DMA_DSR_RPS_WIDTH + DMA_DSR_TPS_WIDTH) @@ -959,6 +951,7 @@ #define XP_DRIVER_INT_RO 0x0064 #define XP_DRIVER_SCRATCH_0 0x0068 #define XP_DRIVER_SCRATCH_1 0x006c +#define XP_INT_REISSUE_EN 0x0074 #define XP_INT_EN 0x0078 #define XP_I2C_MUTEX 0x0080 #define XP_MDIO_MUTEX 0x0084 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 0a98c369df20..45d92304068e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -176,8 +176,8 @@ static void xgbe_free_ring_resources(struct xgbe_prv_data *pdata) DBGPR("-->xgbe_free_ring_resources\n"); - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; xgbe_free_ring(pdata, channel->tx_ring); xgbe_free_ring(pdata, channel->rx_ring); } @@ -185,34 +185,60 @@ static void xgbe_free_ring_resources(struct xgbe_prv_data *pdata) DBGPR("<--xgbe_free_ring_resources\n"); } +static void *xgbe_alloc_node(size_t size, int node) +{ + void *mem; + + mem = kzalloc_node(size, GFP_KERNEL, node); + if (!mem) + mem = kzalloc(size, GFP_KERNEL); + + return mem; +} + +static void *xgbe_dma_alloc_node(struct device *dev, size_t size, + dma_addr_t *dma, int node) +{ + void *mem; + int cur_node = dev_to_node(dev); + + set_dev_node(dev, node); + mem = dma_alloc_coherent(dev, size, dma, GFP_KERNEL); + set_dev_node(dev, cur_node); + + if (!mem) + mem = dma_alloc_coherent(dev, size, dma, GFP_KERNEL); + + return mem; +} + static int xgbe_init_ring(struct xgbe_prv_data *pdata, struct xgbe_ring *ring, unsigned int rdesc_count) { - DBGPR("-->xgbe_init_ring\n"); + size_t size; if (!ring) return 0; /* Descriptors */ + size = rdesc_count * sizeof(struct xgbe_ring_desc); + ring->rdesc_count = rdesc_count; - ring->rdesc = dma_alloc_coherent(pdata->dev, - (sizeof(struct xgbe_ring_desc) * - rdesc_count), &ring->rdesc_dma, - GFP_KERNEL); + ring->rdesc = xgbe_dma_alloc_node(pdata->dev, size, &ring->rdesc_dma, + ring->node); if (!ring->rdesc) return -ENOMEM; /* Descriptor information */ - ring->rdata = kcalloc(rdesc_count, sizeof(struct xgbe_ring_data), - GFP_KERNEL); + size = rdesc_count * sizeof(struct xgbe_ring_data); + + ring->rdata = xgbe_alloc_node(size, ring->node); if (!ring->rdata) return -ENOMEM; netif_dbg(pdata, drv, pdata->netdev, - "rdesc=%p, rdesc_dma=%pad, rdata=%p\n", - ring->rdesc, &ring->rdesc_dma, ring->rdata); - - DBGPR("<--xgbe_init_ring\n"); + "rdesc=%p, rdesc_dma=%pad, rdata=%p, node=%d\n", + ring->rdesc, &ring->rdesc_dma, ring->rdata, ring->node); return 0; } @@ -223,10 +249,8 @@ static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata) unsigned int i; int ret; - DBGPR("-->xgbe_alloc_ring_resources\n"); - - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; netif_dbg(pdata, drv, pdata->netdev, "%s - Tx ring:\n", channel->name); @@ -250,8 +274,6 @@ static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata) } } - DBGPR("<--xgbe_alloc_ring_resources\n"); - return 0; err_ring: @@ -261,21 +283,33 @@ err_ring: } static int xgbe_alloc_pages(struct xgbe_prv_data *pdata, - struct xgbe_page_alloc *pa, gfp_t gfp, int order) + struct xgbe_page_alloc *pa, int alloc_order, + int node) { struct page *pages = NULL; dma_addr_t pages_dma; - int ret; + gfp_t gfp; + int order, ret; + +again: + order = alloc_order; /* Try to obtain pages, decreasing order if necessary */ - gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN; + gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN; while (order >= 0) { - pages = alloc_pages(gfp, order); + pages = alloc_pages_node(node, gfp, order); if (pages) break; order--; } + + /* If we couldn't get local pages, try getting from anywhere */ + if (!pages && (node != NUMA_NO_NODE)) { + node = NUMA_NO_NODE; + goto again; + } + if (!pages) return -ENOMEM; @@ -327,14 +361,14 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata, int ret; if (!ring->rx_hdr_pa.pages) { - ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, GFP_ATOMIC, 0); + ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, 0, ring->node); if (ret) return ret; } if (!ring->rx_buf_pa.pages) { - ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa, GFP_ATOMIC, - PAGE_ALLOC_COSTLY_ORDER); + ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa, + PAGE_ALLOC_COSTLY_ORDER, ring->node); if (ret) return ret; } @@ -362,8 +396,8 @@ static void xgbe_wrapper_tx_descriptor_init(struct xgbe_prv_data *pdata) DBGPR("-->xgbe_wrapper_tx_descriptor_init\n"); - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; ring = channel->tx_ring; if (!ring) break; @@ -403,8 +437,8 @@ static void xgbe_wrapper_rx_descriptor_init(struct xgbe_prv_data *pdata) DBGPR("-->xgbe_wrapper_rx_descriptor_init\n"); - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; ring = channel->rx_ring; if (!ring) break; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 24a687ce4388..06f953e1e9b2 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -174,58 +174,30 @@ static unsigned int xgbe_riwt_to_usec(struct xgbe_prv_data *pdata, return ret; } -static int xgbe_config_pblx8(struct xgbe_prv_data *pdata) +static int xgbe_config_pbl_val(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; + unsigned int pblx8, pbl; unsigned int i; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_CR, PBLX8, - pdata->pblx8); - - return 0; -} - -static int xgbe_get_tx_pbl_val(struct xgbe_prv_data *pdata) -{ - return XGMAC_DMA_IOREAD_BITS(pdata->channel, DMA_CH_TCR, PBL); -} - -static int xgbe_config_tx_pbl_val(struct xgbe_prv_data *pdata) -{ - struct xgbe_channel *channel; - unsigned int i; - - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) - break; + pblx8 = DMA_PBL_X8_DISABLE; + pbl = pdata->pbl; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, PBL, - pdata->tx_pbl); + if (pdata->pbl > 32) { + pblx8 = DMA_PBL_X8_ENABLE; + pbl >>= 3; } - return 0; -} - -static int xgbe_get_rx_pbl_val(struct xgbe_prv_data *pdata) -{ - return XGMAC_DMA_IOREAD_BITS(pdata->channel, DMA_CH_RCR, PBL); -} - -static int xgbe_config_rx_pbl_val(struct xgbe_prv_data *pdata) -{ - struct xgbe_channel *channel; - unsigned int i; + for (i = 0; i < pdata->channel_count; i++) { + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, PBLX8, + pblx8); - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->rx_ring) - break; + if (pdata->channel[i]->tx_ring) + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, + PBL, pbl); - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, PBL, - pdata->rx_pbl); + if (pdata->channel[i]->rx_ring) + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, + PBL, pbl); } return 0; @@ -233,15 +205,13 @@ static int xgbe_config_rx_pbl_val(struct xgbe_prv_data *pdata) static int xgbe_config_osp_mode(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->tx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, OSP, + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, OSP, pdata->tx_osp_mode); } @@ -292,15 +262,13 @@ static int xgbe_config_tx_threshold(struct xgbe_prv_data *pdata, static int xgbe_config_rx_coalesce(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->rx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RIWT, RWT, + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RIWT, RWT, pdata->rx_riwt); } @@ -314,44 +282,38 @@ static int xgbe_config_tx_coalesce(struct xgbe_prv_data *pdata) static void xgbe_config_rx_buffer_size(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->rx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, RBSZ, + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, RBSZ, pdata->rx_buf_size); } } static void xgbe_config_tso_mode(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->tx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, TSE, 1); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, TSE, 1); } } static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->rx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_CR, SPH, 1); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 1); } XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE); @@ -651,8 +613,9 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, DMA_MR, INTM, pdata->channel_irq_mode); - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; + /* Clear all the interrupts which are set */ dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR); XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr); @@ -1497,26 +1460,37 @@ static void xgbe_rx_desc_init(struct xgbe_channel *channel) static void xgbe_update_tstamp_addend(struct xgbe_prv_data *pdata, unsigned int addend) { + unsigned int count = 10000; + /* Set the addend register value and tell the device */ XGMAC_IOWRITE(pdata, MAC_TSAR, addend); XGMAC_IOWRITE_BITS(pdata, MAC_TSCR, TSADDREG, 1); /* Wait for addend update to complete */ - while (XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSADDREG)) + while (--count && XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSADDREG)) udelay(5); + + if (!count) + netdev_err(pdata->netdev, + "timed out updating timestamp addend register\n"); } static void xgbe_set_tstamp_time(struct xgbe_prv_data *pdata, unsigned int sec, unsigned int nsec) { + unsigned int count = 10000; + /* Set the time values and tell the device */ XGMAC_IOWRITE(pdata, MAC_STSUR, sec); XGMAC_IOWRITE(pdata, MAC_STNUR, nsec); XGMAC_IOWRITE_BITS(pdata, MAC_TSCR, TSINIT, 1); /* Wait for time update to complete */ - while (XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSINIT)) + while (--count && XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSINIT)) udelay(5); + + if (!count) + netdev_err(pdata->netdev, "timed out initializing timestamp\n"); } static u64 xgbe_get_tstamp_time(struct xgbe_prv_data *pdata) @@ -2140,37 +2114,38 @@ static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata) static void xgbe_config_dma_bus(struct xgbe_prv_data *pdata) { + unsigned int sbmr; + + sbmr = XGMAC_IOREAD(pdata, DMA_SBMR); + /* Set enhanced addressing mode */ - XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, EAME, 1); + XGMAC_SET_BITS(sbmr, DMA_SBMR, EAME, 1); /* Set the System Bus mode */ - XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, UNDEF, 1); - XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, BLEN_256, 1); + XGMAC_SET_BITS(sbmr, DMA_SBMR, UNDEF, 1); + XGMAC_SET_BITS(sbmr, DMA_SBMR, BLEN, pdata->blen >> 2); + XGMAC_SET_BITS(sbmr, DMA_SBMR, AAL, pdata->aal); + XGMAC_SET_BITS(sbmr, DMA_SBMR, RD_OSR_LMT, pdata->rd_osr_limit - 1); + XGMAC_SET_BITS(sbmr, DMA_SBMR, WR_OSR_LMT, pdata->wr_osr_limit - 1); + + XGMAC_IOWRITE(pdata, DMA_SBMR, sbmr); + + /* Set descriptor fetching threshold */ + if (pdata->vdata->tx_desc_prefetch) + XGMAC_IOWRITE_BITS(pdata, DMA_TXEDMACR, TDPS, + pdata->vdata->tx_desc_prefetch); + + if (pdata->vdata->rx_desc_prefetch) + XGMAC_IOWRITE_BITS(pdata, DMA_RXEDMACR, RDPS, + pdata->vdata->rx_desc_prefetch); } static void xgbe_config_dma_cache(struct xgbe_prv_data *pdata) { - unsigned int arcache, awcache; - - arcache = 0; - XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRC, pdata->arcache); - XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRD, pdata->axdomain); - XGMAC_SET_BITS(arcache, DMA_AXIARCR, TEC, pdata->arcache); - XGMAC_SET_BITS(arcache, DMA_AXIARCR, TED, pdata->axdomain); - XGMAC_SET_BITS(arcache, DMA_AXIARCR, THC, pdata->arcache); - XGMAC_SET_BITS(arcache, DMA_AXIARCR, THD, pdata->axdomain); - XGMAC_IOWRITE(pdata, DMA_AXIARCR, arcache); - - awcache = 0; - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWC, pdata->awcache); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWD, pdata->axdomain); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPC, pdata->awcache); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPD, pdata->axdomain); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHC, pdata->awcache); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHD, pdata->axdomain); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDC, pdata->awcache); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDD, pdata->axdomain); - XGMAC_IOWRITE(pdata, DMA_AXIAWCR, awcache); + XGMAC_IOWRITE(pdata, DMA_AXIARCR, pdata->arcr); + XGMAC_IOWRITE(pdata, DMA_AXIAWCR, pdata->awcr); + if (pdata->awarcr) + XGMAC_IOWRITE(pdata, DMA_AXIAWARCR, pdata->awarcr); } static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata) @@ -3202,16 +3177,14 @@ static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata, static void xgbe_enable_tx(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; /* Enable each Tx DMA channel */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->tx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 1); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 1); } /* Enable each Tx queue */ @@ -3225,7 +3198,6 @@ static void xgbe_enable_tx(struct xgbe_prv_data *pdata) static void xgbe_disable_tx(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; /* Prepare for Tx DMA channel stop */ @@ -3240,12 +3212,11 @@ static void xgbe_disable_tx(struct xgbe_prv_data *pdata) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0); /* Disable each Tx DMA channel */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->tx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 0); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0); } } @@ -3277,16 +3248,14 @@ static void xgbe_prepare_rx_stop(struct xgbe_prv_data *pdata, static void xgbe_enable_rx(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int reg_val, i; /* Enable each Rx DMA channel */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->rx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 1); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 1); } /* Enable each Rx queue */ @@ -3304,7 +3273,6 @@ static void xgbe_enable_rx(struct xgbe_prv_data *pdata) static void xgbe_disable_rx(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; /* Disable MAC Rx */ @@ -3321,27 +3289,24 @@ static void xgbe_disable_rx(struct xgbe_prv_data *pdata) XGMAC_IOWRITE(pdata, MAC_RQC0R, 0); /* Disable each Rx DMA channel */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->rx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 0); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 0); } } static void xgbe_powerup_tx(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; /* Enable each Tx DMA channel */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->tx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 1); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 1); } /* Enable MAC Tx */ @@ -3350,7 +3315,6 @@ static void xgbe_powerup_tx(struct xgbe_prv_data *pdata) static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; /* Prepare for Tx DMA channel stop */ @@ -3361,42 +3325,37 @@ static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0); /* Disable each Tx DMA channel */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->tx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 0); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0); } } static void xgbe_powerup_rx(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; /* Enable each Rx DMA channel */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->rx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 1); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 1); } } static void xgbe_powerdown_rx(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; /* Disable each Rx DMA channel */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->rx_ring) + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) break; - XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 0); + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 0); } } @@ -3420,9 +3379,7 @@ static int xgbe_init(struct xgbe_prv_data *pdata) xgbe_config_dma_bus(pdata); xgbe_config_dma_cache(pdata); xgbe_config_osp_mode(pdata); - xgbe_config_pblx8(pdata); - xgbe_config_tx_pbl_val(pdata); - xgbe_config_rx_pbl_val(pdata); + xgbe_config_pbl_val(pdata); xgbe_config_rx_coalesce(pdata); xgbe_config_tx_coalesce(pdata); xgbe_config_rx_buffer_size(pdata); @@ -3550,13 +3507,6 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) /* For TX DMA Operating on Second Frame config */ hw_if->config_osp_mode = xgbe_config_osp_mode; - /* For RX and TX PBL config */ - hw_if->config_rx_pbl_val = xgbe_config_rx_pbl_val; - hw_if->get_rx_pbl_val = xgbe_get_rx_pbl_val; - hw_if->config_tx_pbl_val = xgbe_config_tx_pbl_val; - hw_if->get_tx_pbl_val = xgbe_get_tx_pbl_val; - hw_if->config_pblx8 = xgbe_config_pblx8; - /* For MMC statistics support */ hw_if->tx_mmc_int = xgbe_tx_mmc_int; hw_if->rx_mmc_int = xgbe_rx_mmc_int; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index a934bd5d0507..ecef3ee87b17 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -158,81 +158,106 @@ static int xgbe_one_poll(struct napi_struct *, int); static int xgbe_all_poll(struct napi_struct *, int); static void xgbe_stop(struct xgbe_prv_data *); +static void *xgbe_alloc_node(size_t size, int node) +{ + void *mem; + + mem = kzalloc_node(size, GFP_KERNEL, node); + if (!mem) + mem = kzalloc(size, GFP_KERNEL); + + return mem; +} + +static void xgbe_free_channels(struct xgbe_prv_data *pdata) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(pdata->channel); i++) { + if (!pdata->channel[i]) + continue; + + kfree(pdata->channel[i]->rx_ring); + kfree(pdata->channel[i]->tx_ring); + kfree(pdata->channel[i]); + + pdata->channel[i] = NULL; + } + + pdata->channel_count = 0; +} + static int xgbe_alloc_channels(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel_mem, *channel; - struct xgbe_ring *tx_ring, *rx_ring; + struct xgbe_channel *channel; + struct xgbe_ring *ring; unsigned int count, i; - int ret = -ENOMEM; + unsigned int cpu; + int node; count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count); + for (i = 0; i < count; i++) { + /* Attempt to use a CPU on the node the device is on */ + cpu = cpumask_local_spread(i, dev_to_node(pdata->dev)); - channel_mem = kcalloc(count, sizeof(struct xgbe_channel), GFP_KERNEL); - if (!channel_mem) - goto err_channel; - - tx_ring = kcalloc(pdata->tx_ring_count, sizeof(struct xgbe_ring), - GFP_KERNEL); - if (!tx_ring) - goto err_tx_ring; + /* Set the allocation node based on the returned CPU */ + node = cpu_to_node(cpu); - rx_ring = kcalloc(pdata->rx_ring_count, sizeof(struct xgbe_ring), - GFP_KERNEL); - if (!rx_ring) - goto err_rx_ring; + channel = xgbe_alloc_node(sizeof(*channel), node); + if (!channel) + goto err_mem; + pdata->channel[i] = channel; - for (i = 0, channel = channel_mem; i < count; i++, channel++) { snprintf(channel->name, sizeof(channel->name), "channel-%u", i); channel->pdata = pdata; channel->queue_index = i; channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE + (DMA_CH_INC * i); + channel->node = node; + cpumask_set_cpu(cpu, &channel->affinity_mask); if (pdata->per_channel_irq) channel->dma_irq = pdata->channel_irq[i]; if (i < pdata->tx_ring_count) { - spin_lock_init(&tx_ring->lock); - channel->tx_ring = tx_ring++; + ring = xgbe_alloc_node(sizeof(*ring), node); + if (!ring) + goto err_mem; + + spin_lock_init(&ring->lock); + ring->node = node; + + channel->tx_ring = ring; } if (i < pdata->rx_ring_count) { - spin_lock_init(&rx_ring->lock); - channel->rx_ring = rx_ring++; + ring = xgbe_alloc_node(sizeof(*ring), node); + if (!ring) + goto err_mem; + + spin_lock_init(&ring->lock); + ring->node = node; + + channel->rx_ring = ring; } netif_dbg(pdata, drv, pdata->netdev, + "%s: cpu=%u, node=%d\n", channel->name, cpu, node); + + netif_dbg(pdata, drv, pdata->netdev, "%s: dma_regs=%p, dma_irq=%d, tx=%p, rx=%p\n", channel->name, channel->dma_regs, channel->dma_irq, channel->tx_ring, channel->rx_ring); } - pdata->channel = channel_mem; pdata->channel_count = count; return 0; -err_rx_ring: - kfree(tx_ring); - -err_tx_ring: - kfree(channel_mem); - -err_channel: - return ret; -} - -static void xgbe_free_channels(struct xgbe_prv_data *pdata) -{ - if (!pdata->channel) - return; - - kfree(pdata->channel->rx_ring); - kfree(pdata->channel->tx_ring); - kfree(pdata->channel); +err_mem: + xgbe_free_channels(pdata); - pdata->channel = NULL; - pdata->channel_count = 0; + return -ENOMEM; } static inline unsigned int xgbe_tx_avail_desc(struct xgbe_ring *ring) @@ -301,12 +326,10 @@ static void xgbe_enable_rx_tx_int(struct xgbe_prv_data *pdata, static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) - xgbe_enable_rx_tx_int(pdata, channel); + for (i = 0; i < pdata->channel_count; i++) + xgbe_enable_rx_tx_int(pdata, pdata->channel[i]); } static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata, @@ -329,12 +352,10 @@ static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata, static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; unsigned int i; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) - xgbe_disable_rx_tx_int(pdata, channel); + for (i = 0; i < pdata->channel_count; i++) + xgbe_disable_rx_tx_int(pdata, pdata->channel[i]); } static bool xgbe_ecc_sec(struct xgbe_prv_data *pdata, unsigned long *period, @@ -382,9 +403,9 @@ static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period, return false; } -static irqreturn_t xgbe_ecc_isr(int irq, void *data) +static void xgbe_ecc_isr_task(unsigned long data) { - struct xgbe_prv_data *pdata = data; + struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; unsigned int ecc_isr; bool stop = false; @@ -435,12 +456,26 @@ out: /* Clear all ECC interrupts */ XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr); - return IRQ_HANDLED; + /* Reissue interrupt if status is not clear */ + if (pdata->vdata->irq_reissue_support) + XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 1); } -static irqreturn_t xgbe_isr(int irq, void *data) +static irqreturn_t xgbe_ecc_isr(int irq, void *data) { struct xgbe_prv_data *pdata = data; + + if (pdata->isr_as_tasklet) + tasklet_schedule(&pdata->tasklet_ecc); + else + xgbe_ecc_isr_task((unsigned long)pdata); + + return IRQ_HANDLED; +} + +static void xgbe_isr_task(unsigned long data) +{ + struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_channel *channel; unsigned int dma_isr, dma_ch_isr; @@ -461,7 +496,7 @@ static irqreturn_t xgbe_isr(int irq, void *data) if (!(dma_isr & (1 << i))) continue; - channel = pdata->channel + i; + channel = pdata->channel[i]; dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR); netif_dbg(pdata, intr, pdata->netdev, "DMA_CH%u_ISR=%#010x\n", @@ -543,15 +578,36 @@ static irqreturn_t xgbe_isr(int irq, void *data) isr_done: /* If there is not a separate AN irq, handle it here */ if (pdata->dev_irq == pdata->an_irq) - pdata->phy_if.an_isr(irq, pdata); + pdata->phy_if.an_isr(pdata); /* If there is not a separate ECC irq, handle it here */ if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq)) - xgbe_ecc_isr(irq, pdata); + xgbe_ecc_isr_task((unsigned long)pdata); /* If there is not a separate I2C irq, handle it here */ if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq)) - pdata->i2c_if.i2c_isr(irq, pdata); + pdata->i2c_if.i2c_isr(pdata); + + /* Reissue interrupt if status is not clear */ + if (pdata->vdata->irq_reissue_support) { + unsigned int reissue_mask; + + reissue_mask = 1 << 0; + if (!pdata->per_channel_irq) + reissue_mask |= 0xffff < 4; + + XP_IOWRITE(pdata, XP_INT_REISSUE_EN, reissue_mask); + } +} + +static irqreturn_t xgbe_isr(int irq, void *data) +{ + struct xgbe_prv_data *pdata = data; + + if (pdata->isr_as_tasklet) + tasklet_schedule(&pdata->tasklet_dev); + else + xgbe_isr_task((unsigned long)pdata); return IRQ_HANDLED; } @@ -640,8 +696,8 @@ static void xgbe_init_timers(struct xgbe_prv_data *pdata) setup_timer(&pdata->service_timer, xgbe_service_timer, (unsigned long)pdata); - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; if (!channel->tx_ring) break; @@ -662,8 +718,8 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata) del_timer_sync(&pdata->service_timer); - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; if (!channel->tx_ring) break; @@ -781,8 +837,8 @@ static void xgbe_napi_enable(struct xgbe_prv_data *pdata, unsigned int add) unsigned int i; if (pdata->per_channel_irq) { - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; if (add) netif_napi_add(pdata->netdev, &channel->napi, xgbe_one_poll, NAPI_POLL_WEIGHT); @@ -804,8 +860,8 @@ static void xgbe_napi_disable(struct xgbe_prv_data *pdata, unsigned int del) unsigned int i; if (pdata->per_channel_irq) { - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; napi_disable(&channel->napi); if (del) @@ -826,6 +882,10 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata) unsigned int i; int ret; + tasklet_init(&pdata->tasklet_dev, xgbe_isr_task, (unsigned long)pdata); + tasklet_init(&pdata->tasklet_ecc, xgbe_ecc_isr_task, + (unsigned long)pdata); + ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0, netdev->name, pdata); if (ret) { @@ -847,8 +907,8 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata) if (!pdata->per_channel_irq) return 0; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; snprintf(channel->dma_irq_name, sizeof(channel->dma_irq_name) - 1, "%s-TxRx-%u", netdev_name(netdev), @@ -862,14 +922,21 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata) channel->dma_irq); goto err_dma_irq; } + + irq_set_affinity_hint(channel->dma_irq, + &channel->affinity_mask); } return 0; err_dma_irq: /* Using an unsigned int, 'i' will go to UINT_MAX and exit */ - for (i--, channel--; i < pdata->channel_count; i--, channel--) + for (i--; i < pdata->channel_count; i--) { + channel = pdata->channel[i]; + + irq_set_affinity_hint(channel->dma_irq, NULL); devm_free_irq(pdata->dev, channel->dma_irq, channel); + } if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); @@ -893,9 +960,12 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata) if (!pdata->per_channel_irq) return; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; + + irq_set_affinity_hint(channel->dma_irq, NULL); devm_free_irq(pdata->dev, channel->dma_irq, channel); + } } void xgbe_init_tx_coalesce(struct xgbe_prv_data *pdata) @@ -930,16 +1000,14 @@ void xgbe_init_rx_coalesce(struct xgbe_prv_data *pdata) static void xgbe_free_tx_data(struct xgbe_prv_data *pdata) { struct xgbe_desc_if *desc_if = &pdata->desc_if; - struct xgbe_channel *channel; struct xgbe_ring *ring; struct xgbe_ring_data *rdata; unsigned int i, j; DBGPR("-->xgbe_free_tx_data\n"); - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - ring = channel->tx_ring; + for (i = 0; i < pdata->channel_count; i++) { + ring = pdata->channel[i]->tx_ring; if (!ring) break; @@ -955,16 +1023,14 @@ static void xgbe_free_tx_data(struct xgbe_prv_data *pdata) static void xgbe_free_rx_data(struct xgbe_prv_data *pdata) { struct xgbe_desc_if *desc_if = &pdata->desc_if; - struct xgbe_channel *channel; struct xgbe_ring *ring; struct xgbe_ring_data *rdata; unsigned int i, j; DBGPR("-->xgbe_free_rx_data\n"); - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - ring = channel->rx_ring; + for (i = 0; i < pdata->channel_count; i++) { + ring = pdata->channel[i]->rx_ring; if (!ring) break; @@ -1140,8 +1206,8 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) hw_if->exit(pdata); - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; if (!channel->tx_ring) continue; @@ -1212,6 +1278,10 @@ static void xgbe_tx_tstamp(struct work_struct *work) u64 nsec; unsigned long flags; + spin_lock_irqsave(&pdata->tstamp_lock, flags); + if (!pdata->tx_tstamp_skb) + goto unlock; + if (pdata->tx_tstamp) { nsec = timecounter_cyc2time(&pdata->tstamp_tc, pdata->tx_tstamp); @@ -1223,8 +1293,9 @@ static void xgbe_tx_tstamp(struct work_struct *work) dev_kfree_skb_any(pdata->tx_tstamp_skb); - spin_lock_irqsave(&pdata->tstamp_lock, flags); pdata->tx_tstamp_skb = NULL; + +unlock: spin_unlock_irqrestore(&pdata->tstamp_lock, flags); } @@ -1623,7 +1694,7 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev) DBGPR("-->xgbe_xmit: skb->len = %d\n", skb->len); - channel = pdata->channel + skb->queue_mapping; + channel = pdata->channel[skb->queue_mapping]; txq = netdev_get_tx_queue(netdev, channel->queue_index); ring = channel->tx_ring; packet = &ring->packet_data; @@ -1833,9 +1904,10 @@ static void xgbe_poll_controller(struct net_device *netdev) DBGPR("-->xgbe_poll_controller\n"); if (pdata->per_channel_irq) { - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; xgbe_dma_isr(channel->dma_irq, channel); + } } else { disable_irq(pdata->dev_irq); xgbe_isr(pdata->dev_irq, pdata); @@ -2328,8 +2400,9 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget) do { last_processed = processed; - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; + /* Cleanup Tx ring first */ xgbe_tx_poll(channel); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 920566a3a599..67a2e52ad25d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -247,7 +247,7 @@ static int xgbe_set_pauseparam(struct net_device *netdev, if (pause->autoneg && (pdata->phy.autoneg != AUTONEG_ENABLE)) { netdev_err(netdev, - "autoneg disabled, pause autoneg not avialable\n"); + "autoneg disabled, pause autoneg not available\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c index 417bdb5982a9..4d9062d35930 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c @@ -274,13 +274,16 @@ static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata, XI2C_IOREAD(pdata, IC_CLR_STOP_DET); } -static irqreturn_t xgbe_i2c_isr(int irq, void *data) +static void xgbe_i2c_isr_task(unsigned long data) { struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; unsigned int isr; isr = XI2C_IOREAD(pdata, IC_RAW_INTR_STAT); + if (!isr) + goto reissue_check; + netif_dbg(pdata, intr, pdata->netdev, "I2C interrupt received: status=%#010x\n", isr); @@ -308,6 +311,21 @@ out: if (state->ret || XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET)) complete(&pdata->i2c_complete); +reissue_check: + /* Reissue interrupt if status is not clear */ + if (pdata->vdata->irq_reissue_support) + XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 2); +} + +static irqreturn_t xgbe_i2c_isr(int irq, void *data) +{ + struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + + if (pdata->isr_as_tasklet) + tasklet_schedule(&pdata->tasklet_i2c); + else + xgbe_i2c_isr_task((unsigned long)pdata); + return IRQ_HANDLED; } @@ -349,12 +367,11 @@ static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr) XI2C_IOWRITE(pdata, IC_TAR, addr); } -static irqreturn_t xgbe_i2c_combined_isr(int irq, struct xgbe_prv_data *pdata) +static irqreturn_t xgbe_i2c_combined_isr(struct xgbe_prv_data *pdata) { - if (!XI2C_IOREAD(pdata, IC_RAW_INTR_STAT)) - return IRQ_HANDLED; + xgbe_i2c_isr_task((unsigned long)pdata); - return xgbe_i2c_isr(irq, pdata); + return IRQ_HANDLED; } static int xgbe_i2c_xfer(struct xgbe_prv_data *pdata, struct xgbe_i2c_op *op) @@ -445,6 +462,9 @@ static int xgbe_i2c_start(struct xgbe_prv_data *pdata) /* If we have a separate I2C irq, enable it */ if (pdata->dev_irq != pdata->i2c_irq) { + tasklet_init(&pdata->tasklet_i2c, xgbe_i2c_isr_task, + (unsigned long)pdata); + ret = devm_request_irq(pdata->dev, pdata->i2c_irq, xgbe_i2c_isr, 0, pdata->i2c_name, pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 17ac8f9a51a0..500147d9e3c8 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -140,14 +140,16 @@ static void xgbe_default_config(struct xgbe_prv_data *pdata) { DBGPR("-->xgbe_default_config\n"); - pdata->pblx8 = DMA_PBL_X8_ENABLE; + pdata->blen = DMA_SBMR_BLEN_64; + pdata->pbl = DMA_PBL_128; + pdata->aal = 1; + pdata->rd_osr_limit = 8; + pdata->wr_osr_limit = 8; pdata->tx_sf_mode = MTL_TSF_ENABLE; pdata->tx_threshold = MTL_TX_THRESHOLD_64; - pdata->tx_pbl = DMA_PBL_16; pdata->tx_osp_mode = DMA_OSP_ENABLE; pdata->rx_sf_mode = MTL_RSF_DISABLE; pdata->rx_threshold = MTL_RX_THRESHOLD_64; - pdata->rx_pbl = DMA_PBL_16; pdata->pause_autoneg = 1; pdata->tx_pause = 1; pdata->rx_pause = 1; @@ -277,7 +279,11 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata) pdata->desc_ded_period = jiffies; /* Issue software reset to device */ - pdata->hw_if.exit(pdata); + ret = pdata->hw_if.exit(pdata); + if (ret) { + dev_err(dev, "software reset failed\n"); + return ret; + } /* Set default configuration data */ xgbe_default_config(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index b672d9249539..80684914dd8a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -665,6 +665,10 @@ static void xgbe_an37_isr(struct xgbe_prv_data *pdata) } else { /* Enable AN interrupts */ xgbe_an37_enable_interrupts(pdata); + + /* Reissue interrupt if status is not clear */ + if (pdata->vdata->irq_reissue_support) + XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3); } } @@ -684,10 +688,14 @@ static void xgbe_an73_isr(struct xgbe_prv_data *pdata) } else { /* Enable AN interrupts */ xgbe_an73_enable_interrupts(pdata); + + /* Reissue interrupt if status is not clear */ + if (pdata->vdata->irq_reissue_support) + XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3); } } -static irqreturn_t xgbe_an_isr(int irq, void *data) +static void xgbe_an_isr_task(unsigned long data) { struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; @@ -705,13 +713,25 @@ static irqreturn_t xgbe_an_isr(int irq, void *data) default: break; } +} + +static irqreturn_t xgbe_an_isr(int irq, void *data) +{ + struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + + if (pdata->isr_as_tasklet) + tasklet_schedule(&pdata->tasklet_an); + else + xgbe_an_isr_task((unsigned long)pdata); return IRQ_HANDLED; } -static irqreturn_t xgbe_an_combined_isr(int irq, struct xgbe_prv_data *pdata) +static irqreturn_t xgbe_an_combined_isr(struct xgbe_prv_data *pdata) { - return xgbe_an_isr(irq, pdata); + xgbe_an_isr_task((unsigned long)pdata); + + return IRQ_HANDLED; } static void xgbe_an_irq_work(struct work_struct *work) @@ -915,6 +935,10 @@ static void xgbe_an_state_machine(struct work_struct *work) break; } + /* Reissue interrupt if status is not clear */ + if (pdata->vdata->irq_reissue_support) + XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3); + mutex_unlock(&pdata->an_mutex); } @@ -1379,6 +1403,9 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata) /* If we have a separate AN irq, enable it */ if (pdata->dev_irq != pdata->an_irq) { + tasklet_init(&pdata->tasklet_an, xgbe_an_isr_task, + (unsigned long)pdata); + ret = devm_request_irq(pdata->dev, pdata->an_irq, xgbe_an_isr, 0, pdata->an_name, pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index 38392a520725..1e56ad7bd9a5 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -139,6 +139,7 @@ static int xgbe_config_multi_msi(struct xgbe_prv_data *pdata) return ret; } + pdata->isr_as_tasklet = 1; pdata->irq_count = ret; pdata->dev_irq = pci_irq_vector(pdata->pcidev, 0); @@ -175,6 +176,7 @@ static int xgbe_config_irqs(struct xgbe_prv_data *pdata) return ret; } + pdata->isr_as_tasklet = pdata->pcidev->msi_enabled ? 1 : 0; pdata->irq_count = 1; pdata->channel_irq_count = 1; @@ -325,9 +327,9 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Set the DMA coherency values */ pdata->coherent = 1; - pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; - pdata->arcache = XGBE_DMA_OS_ARCACHE; - pdata->awcache = XGBE_DMA_OS_AWCACHE; + pdata->arcr = XGBE_DMA_PCI_ARCR; + pdata->awcr = XGBE_DMA_PCI_AWCR; + pdata->awarcr = XGBE_DMA_PCI_AWARCR; /* Set the maximum channels and queues */ reg = XP_IOREAD(pdata, XP_PROP_1); @@ -445,6 +447,9 @@ static const struct xgbe_version_data xgbe_v2a = { .tx_tstamp_workaround = 1, .ecc_support = 1, .i2c_support = 1, + .irq_reissue_support = 1, + .tx_desc_prefetch = 5, + .rx_desc_prefetch = 5, }; static const struct xgbe_version_data xgbe_v2b = { @@ -456,6 +461,9 @@ static const struct xgbe_version_data xgbe_v2b = { .tx_tstamp_workaround = 1, .ecc_support = 1, .i2c_support = 1, + .irq_reissue_support = 1, + .tx_desc_prefetch = 5, + .rx_desc_prefetch = 5, }; static const struct pci_device_id xgbe_pci_table[] = { diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index e707c49cc55a..04b5c149caca 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -711,23 +711,39 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; + if (!phy_data->sfp_mod_absent && !phy_data->sfp_changed) + return; + + pdata->phy.supported &= ~SUPPORTED_Autoneg; + pdata->phy.supported &= ~(SUPPORTED_Pause | SUPPORTED_Asym_Pause); + pdata->phy.supported &= ~SUPPORTED_TP; + pdata->phy.supported &= ~SUPPORTED_FIBRE; + pdata->phy.supported &= ~SUPPORTED_100baseT_Full; + pdata->phy.supported &= ~SUPPORTED_1000baseT_Full; + pdata->phy.supported &= ~SUPPORTED_10000baseT_Full; + if (phy_data->sfp_mod_absent) { pdata->phy.speed = SPEED_UNKNOWN; pdata->phy.duplex = DUPLEX_UNKNOWN; pdata->phy.autoneg = AUTONEG_ENABLE; + pdata->phy.pause_autoneg = AUTONEG_ENABLE; + + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + pdata->phy.supported |= SUPPORTED_FIBRE; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) + pdata->phy.supported |= SUPPORTED_100baseT_Full; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) + pdata->phy.supported |= SUPPORTED_10000baseT_Full; + pdata->phy.advertising = pdata->phy.supported; return; } - pdata->phy.advertising &= ~ADVERTISED_Autoneg; - pdata->phy.advertising &= ~ADVERTISED_TP; - pdata->phy.advertising &= ~ADVERTISED_FIBRE; - pdata->phy.advertising &= ~ADVERTISED_100baseT_Full; - pdata->phy.advertising &= ~ADVERTISED_1000baseT_Full; - pdata->phy.advertising &= ~ADVERTISED_10000baseT_Full; - pdata->phy.advertising &= ~ADVERTISED_10000baseR_FEC; - switch (phy_data->sfp_base) { case XGBE_SFP_BASE_1000_T: case XGBE_SFP_BASE_1000_SX: @@ -736,17 +752,25 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata) pdata->phy.speed = SPEED_UNKNOWN; pdata->phy.duplex = DUPLEX_UNKNOWN; pdata->phy.autoneg = AUTONEG_ENABLE; - pdata->phy.advertising |= ADVERTISED_Autoneg; + pdata->phy.pause_autoneg = AUTONEG_ENABLE; + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; break; case XGBE_SFP_BASE_10000_SR: case XGBE_SFP_BASE_10000_LR: case XGBE_SFP_BASE_10000_LRM: case XGBE_SFP_BASE_10000_ER: case XGBE_SFP_BASE_10000_CR: - default: pdata->phy.speed = SPEED_10000; pdata->phy.duplex = DUPLEX_FULL; pdata->phy.autoneg = AUTONEG_DISABLE; + pdata->phy.pause_autoneg = AUTONEG_DISABLE; + break; + default: + pdata->phy.speed = SPEED_UNKNOWN; + pdata->phy.duplex = DUPLEX_UNKNOWN; + pdata->phy.autoneg = AUTONEG_DISABLE; + pdata->phy.pause_autoneg = AUTONEG_DISABLE; break; } @@ -754,36 +778,38 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata) case XGBE_SFP_BASE_1000_T: case XGBE_SFP_BASE_1000_CX: case XGBE_SFP_BASE_10000_CR: - pdata->phy.advertising |= ADVERTISED_TP; + pdata->phy.supported |= SUPPORTED_TP; break; default: - pdata->phy.advertising |= ADVERTISED_FIBRE; + pdata->phy.supported |= SUPPORTED_FIBRE; } switch (phy_data->sfp_speed) { case XGBE_SFP_SPEED_100_1000: if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) - pdata->phy.advertising |= ADVERTISED_100baseT_Full; + pdata->phy.supported |= SUPPORTED_100baseT_Full; if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) - pdata->phy.advertising |= ADVERTISED_1000baseT_Full; + pdata->phy.supported |= SUPPORTED_1000baseT_Full; break; case XGBE_SFP_SPEED_1000: if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) - pdata->phy.advertising |= ADVERTISED_1000baseT_Full; + pdata->phy.supported |= SUPPORTED_1000baseT_Full; break; case XGBE_SFP_SPEED_10000: if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) - pdata->phy.advertising |= ADVERTISED_10000baseT_Full; + pdata->phy.supported |= SUPPORTED_10000baseT_Full; break; default: /* Choose the fastest supported speed */ if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) - pdata->phy.advertising |= ADVERTISED_10000baseT_Full; + pdata->phy.supported |= SUPPORTED_10000baseT_Full; else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) - pdata->phy.advertising |= ADVERTISED_1000baseT_Full; + pdata->phy.supported |= SUPPORTED_1000baseT_Full; else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) - pdata->phy.advertising |= ADVERTISED_100baseT_Full; + pdata->phy.supported |= SUPPORTED_100baseT_Full; } + + pdata->phy.advertising = pdata->phy.supported; } static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom, @@ -1095,7 +1121,8 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata) ret = xgbe_phy_sfp_get_mux(pdata); if (ret) { - netdev_err(pdata->netdev, "I2C error setting SFP MUX\n"); + dev_err_once(pdata->dev, "%s: I2C error setting SFP MUX\n", + netdev_name(pdata->netdev)); return ret; } @@ -1105,7 +1132,8 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata) &eeprom_addr, sizeof(eeprom_addr), &sfp_eeprom, sizeof(sfp_eeprom)); if (ret) { - netdev_err(pdata->netdev, "I2C error reading SFP EEPROM\n"); + dev_err_once(pdata->dev, "%s: I2C error reading SFP EEPROM\n", + netdev_name(pdata->netdev)); goto put; } @@ -1164,7 +1192,8 @@ static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata) &gpio_reg, sizeof(gpio_reg), gpio_ports, sizeof(gpio_ports)); if (ret) { - netdev_err(pdata->netdev, "I2C error reading SFP GPIOs\n"); + dev_err_once(pdata->dev, "%s: I2C error reading SFP GPIOs\n", + netdev_name(pdata->netdev)); return; } @@ -1694,19 +1723,25 @@ static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata) xgbe_phy_put_comm_ownership(pdata); } -static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata) +static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, + unsigned int cmd, unsigned int sub_cmd) { - if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) - return; + unsigned int s0 = 0; + unsigned int wait; /* Log if a previous command did not complete */ - netif_dbg(pdata, link, pdata->netdev, - "firmware mailbox not ready for command\n"); -} + if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) + netif_dbg(pdata, link, pdata->netdev, + "firmware mailbox not ready for command\n"); -static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata) -{ - unsigned int wait; + /* Construct the command */ + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, sub_cmd); + + /* Issue the command */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); /* Wait for command to complete */ wait = XGBE_RATECHANGE_COUNT; @@ -1723,21 +1758,8 @@ static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata) static void xgbe_phy_rrc(struct xgbe_prv_data *pdata) { - unsigned int s0; - - xgbe_phy_start_ratechange(pdata); - /* Receiver Reset Cycle */ - s0 = 0; - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 5); - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); - - /* Call FW to make the change */ - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); - XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); - - xgbe_phy_complete_ratechange(pdata); + xgbe_phy_perform_ratechange(pdata, 5, 0); netif_dbg(pdata, link, pdata->netdev, "receiver reset complete\n"); } @@ -1746,14 +1768,8 @@ static void xgbe_phy_power_off(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - xgbe_phy_start_ratechange(pdata); - - /* Call FW to make the change */ - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, 0); - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); - XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); - - xgbe_phy_complete_ratechange(pdata); + /* Power off */ + xgbe_phy_perform_ratechange(pdata, 0, 0); phy_data->cur_mode = XGBE_MODE_UNKNOWN; @@ -1763,33 +1779,21 @@ static void xgbe_phy_power_off(struct xgbe_prv_data *pdata) static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int s0; xgbe_phy_set_redrv_mode(pdata); - xgbe_phy_start_ratechange(pdata); - /* 10G/SFI */ - s0 = 0; - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 3); if (phy_data->sfp_cable != XGBE_SFP_CABLE_PASSIVE) { - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); + xgbe_phy_perform_ratechange(pdata, 3, 0); } else { if (phy_data->sfp_cable_len <= 1) - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1); + xgbe_phy_perform_ratechange(pdata, 3, 1); else if (phy_data->sfp_cable_len <= 3) - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2); + xgbe_phy_perform_ratechange(pdata, 3, 2); else - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); + xgbe_phy_perform_ratechange(pdata, 3, 3); } - /* Call FW to make the change */ - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); - XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); - - xgbe_phy_complete_ratechange(pdata); - phy_data->cur_mode = XGBE_MODE_SFI; netif_dbg(pdata, link, pdata->netdev, "10GbE SFI mode set\n"); @@ -1798,23 +1802,11 @@ static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata) static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int s0; xgbe_phy_set_redrv_mode(pdata); - xgbe_phy_start_ratechange(pdata); - /* 1G/X */ - s0 = 0; - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); - - /* Call FW to make the change */ - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); - XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); - - xgbe_phy_complete_ratechange(pdata); + xgbe_phy_perform_ratechange(pdata, 1, 3); phy_data->cur_mode = XGBE_MODE_X; @@ -1824,23 +1816,11 @@ static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata) static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int s0; xgbe_phy_set_redrv_mode(pdata); - xgbe_phy_start_ratechange(pdata); - /* 1G/SGMII */ - s0 = 0; - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2); - - /* Call FW to make the change */ - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); - XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); - - xgbe_phy_complete_ratechange(pdata); + xgbe_phy_perform_ratechange(pdata, 1, 2); phy_data->cur_mode = XGBE_MODE_SGMII_1000; @@ -1850,23 +1830,11 @@ static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata) static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int s0; xgbe_phy_set_redrv_mode(pdata); - xgbe_phy_start_ratechange(pdata); - - /* 1G/SGMII */ - s0 = 0; - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1); - - /* Call FW to make the change */ - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); - XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); - - xgbe_phy_complete_ratechange(pdata); + /* 100M/SGMII */ + xgbe_phy_perform_ratechange(pdata, 1, 1); phy_data->cur_mode = XGBE_MODE_SGMII_100; @@ -1876,23 +1844,11 @@ static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata) static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int s0; xgbe_phy_set_redrv_mode(pdata); - xgbe_phy_start_ratechange(pdata); - /* 10G/KR */ - s0 = 0; - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 4); - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); - - /* Call FW to make the change */ - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); - XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); - - xgbe_phy_complete_ratechange(pdata); + xgbe_phy_perform_ratechange(pdata, 4, 0); phy_data->cur_mode = XGBE_MODE_KR; @@ -1902,23 +1858,11 @@ static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata) static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int s0; xgbe_phy_set_redrv_mode(pdata); - xgbe_phy_start_ratechange(pdata); - /* 2.5G/KX */ - s0 = 0; - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 2); - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); - - /* Call FW to make the change */ - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); - XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); - - xgbe_phy_complete_ratechange(pdata); + xgbe_phy_perform_ratechange(pdata, 2, 0); phy_data->cur_mode = XGBE_MODE_KX_2500; @@ -1928,23 +1872,11 @@ static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata) static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int s0; xgbe_phy_set_redrv_mode(pdata); - xgbe_phy_start_ratechange(pdata); - /* 1G/KX */ - s0 = 0; - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); - XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); - - /* Call FW to make the change */ - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); - XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); - XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); - - xgbe_phy_complete_ratechange(pdata); + xgbe_phy_perform_ratechange(pdata, 1, 3); phy_data->cur_mode = XGBE_MODE_KX_1000; @@ -2037,6 +1969,8 @@ static enum xgbe_mode xgbe_phy_get_baset_mode(struct xgbe_phy_data *phy_data, return XGBE_MODE_SGMII_100; case SPEED_1000: return XGBE_MODE_SGMII_1000; + case SPEED_2500: + return XGBE_MODE_KX_2500; case SPEED_10000: return XGBE_MODE_KR; default: @@ -2180,6 +2114,9 @@ static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata, case XGBE_MODE_SGMII_1000: return xgbe_phy_check_mode(pdata, mode, ADVERTISED_1000baseT_Full); + case XGBE_MODE_KX_2500: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_2500baseX_Full); case XGBE_MODE_KR: return xgbe_phy_check_mode(pdata, mode, ADVERTISED_10000baseT_Full); @@ -2210,6 +2147,8 @@ static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata, return xgbe_phy_check_mode(pdata, mode, ADVERTISED_1000baseT_Full); case XGBE_MODE_SFI: + if (phy_data->sfp_mod_absent) + return true; return xgbe_phy_check_mode(pdata, mode, ADVERTISED_10000baseT_Full); default: @@ -2287,6 +2226,8 @@ static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data, case SPEED_100: case SPEED_1000: return true; + case SPEED_2500: + return (phy_data->port_mode == XGBE_PORT_MODE_NBASE_T); case SPEED_10000: return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_T); default: @@ -3013,9 +2954,6 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata) if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) { pdata->phy.supported |= SUPPORTED_10000baseT_Full; phy_data->start_mode = XGBE_MODE_SFI; - if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) - pdata->phy.supported |= - SUPPORTED_10000baseR_FEC; } phy_data->phydev_mode = XGBE_MDIO_MODE_CL22; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c index 84d4c51cab8c..d0f3dfb88202 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c @@ -448,13 +448,11 @@ static int xgbe_platform_probe(struct platform_device *pdev) } pdata->coherent = (attr == DEV_DMA_COHERENT); if (pdata->coherent) { - pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; - pdata->arcache = XGBE_DMA_OS_ARCACHE; - pdata->awcache = XGBE_DMA_OS_AWCACHE; + pdata->arcr = XGBE_DMA_OS_ARCR; + pdata->awcr = XGBE_DMA_OS_AWCR; } else { - pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN; - pdata->arcache = XGBE_DMA_SYS_ARCACHE; - pdata->awcache = XGBE_DMA_SYS_AWCACHE; + pdata->arcr = XGBE_DMA_SYS_ARCR; + pdata->awcr = XGBE_DMA_SYS_AWCR; } /* Set the maximum fifo amounts */ diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c index a533a6cc2d53..d06d260cf1e2 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c @@ -267,7 +267,7 @@ void xgbe_ptp_register(struct xgbe_prv_data *pdata) ktime_to_ns(ktime_get_real())); /* Disable all timestamping to start */ - XGMAC_IOWRITE(pdata, MAC_TCR, 0); + XGMAC_IOWRITE(pdata, MAC_TSCR, 0); pdata->tstamp_config.tx_type = HWTSTAMP_TX_OFF; pdata->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index f9a24639f574..0938294f640a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -128,6 +128,7 @@ #include <linux/net_tstamp.h> #include <net/dcbnl.h> #include <linux/completion.h> +#include <linux/cpumask.h> #define XGBE_DRV_NAME "amd-xgbe" #define XGBE_DRV_VERSION "1.0.3" @@ -163,14 +164,17 @@ #define XGBE_DMA_STOP_TIMEOUT 1 /* DMA cache settings - Outer sharable, write-back, write-allocate */ -#define XGBE_DMA_OS_AXDOMAIN 0x2 -#define XGBE_DMA_OS_ARCACHE 0xb -#define XGBE_DMA_OS_AWCACHE 0xf +#define XGBE_DMA_OS_ARCR 0x002b2b2b +#define XGBE_DMA_OS_AWCR 0x2f2f2f2f /* DMA cache settings - System, no caches used */ -#define XGBE_DMA_SYS_AXDOMAIN 0x3 -#define XGBE_DMA_SYS_ARCACHE 0x0 -#define XGBE_DMA_SYS_AWCACHE 0x0 +#define XGBE_DMA_SYS_ARCR 0x00303030 +#define XGBE_DMA_SYS_AWCR 0x30303030 + +/* DMA cache settings - PCI device */ +#define XGBE_DMA_PCI_ARCR 0x00000003 +#define XGBE_DMA_PCI_AWCR 0x13131313 +#define XGBE_DMA_PCI_AWARCR 0x00000313 /* DMA channel interrupt modes */ #define XGBE_IRQ_MODE_EDGE 0 @@ -412,6 +416,7 @@ struct xgbe_ring { /* Page allocation for RX buffers */ struct xgbe_page_alloc rx_hdr_pa; struct xgbe_page_alloc rx_buf_pa; + int node; /* Ring index values * cur - Tx: index of descriptor to be used for current transfer @@ -462,6 +467,9 @@ struct xgbe_channel { struct xgbe_ring *tx_ring; struct xgbe_ring *rx_ring; + + int node; + cpumask_t affinity_mask; } ____cacheline_aligned; enum xgbe_state { @@ -734,13 +742,6 @@ struct xgbe_hw_if { /* For TX DMA Operate on Second Frame config */ int (*config_osp_mode)(struct xgbe_prv_data *); - /* For RX and TX PBL config */ - int (*config_rx_pbl_val)(struct xgbe_prv_data *); - int (*get_rx_pbl_val)(struct xgbe_prv_data *); - int (*config_tx_pbl_val)(struct xgbe_prv_data *); - int (*get_tx_pbl_val)(struct xgbe_prv_data *); - int (*config_pblx8)(struct xgbe_prv_data *); - /* For MMC statistics */ void (*rx_mmc_int)(struct xgbe_prv_data *); void (*tx_mmc_int)(struct xgbe_prv_data *); @@ -837,7 +838,7 @@ struct xgbe_phy_if { bool (*phy_valid_speed)(struct xgbe_prv_data *, int); /* For single interrupt support */ - irqreturn_t (*an_isr)(int, struct xgbe_prv_data *); + irqreturn_t (*an_isr)(struct xgbe_prv_data *); /* PHY implementation specific services */ struct xgbe_phy_impl_if phy_impl; @@ -855,7 +856,7 @@ struct xgbe_i2c_if { int (*i2c_xfer)(struct xgbe_prv_data *, struct xgbe_i2c_op *); /* For single interrupt support */ - irqreturn_t (*i2c_isr)(int, struct xgbe_prv_data *); + irqreturn_t (*i2c_isr)(struct xgbe_prv_data *); }; struct xgbe_desc_if { @@ -924,6 +925,9 @@ struct xgbe_version_data { unsigned int tx_tstamp_workaround; unsigned int ecc_support; unsigned int i2c_support; + unsigned int irq_reissue_support; + unsigned int tx_desc_prefetch; + unsigned int rx_desc_prefetch; }; struct xgbe_prv_data { @@ -1001,9 +1005,9 @@ struct xgbe_prv_data { /* AXI DMA settings */ unsigned int coherent; - unsigned int axdomain; - unsigned int arcache; - unsigned int awcache; + unsigned int arcr; + unsigned int awcr; + unsigned int awarcr; /* Service routine support */ struct workqueue_struct *dev_workqueue; @@ -1011,7 +1015,7 @@ struct xgbe_prv_data { struct timer_list service_timer; /* Rings for Tx/Rx on a DMA channel */ - struct xgbe_channel *channel; + struct xgbe_channel *channel[XGBE_MAX_DMA_CHANNELS]; unsigned int tx_max_channel_count; unsigned int rx_max_channel_count; unsigned int channel_count; @@ -1026,19 +1030,21 @@ struct xgbe_prv_data { unsigned int rx_q_count; /* Tx/Rx common settings */ - unsigned int pblx8; + unsigned int blen; + unsigned int pbl; + unsigned int aal; + unsigned int rd_osr_limit; + unsigned int wr_osr_limit; /* Tx settings */ unsigned int tx_sf_mode; unsigned int tx_threshold; - unsigned int tx_pbl; unsigned int tx_osp_mode; unsigned int tx_max_fifo_size; /* Rx settings */ unsigned int rx_sf_mode; unsigned int rx_threshold; - unsigned int rx_pbl; unsigned int rx_max_fifo_size; /* Tx coalescing settings */ @@ -1159,6 +1165,12 @@ struct xgbe_prv_data { unsigned int lpm_ctrl; /* CTRL1 for resume */ + unsigned int isr_as_tasklet; + struct tasklet_struct tasklet_dev; + struct tasklet_struct tasklet_ecc; + struct tasklet_struct tasklet_i2c; + struct tasklet_struct tasklet_an; + #ifdef CONFIG_DEBUG_FS struct dentry *xgbe_debugfs; diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 7e913d8331c3..8c9986f3fc01 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2252,7 +2252,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, if (atl1c_tx_map(adapter, skb, tpd, type) < 0) { netif_info(adapter, tx_done, adapter->netdev, - "tx-skb droppted due to dma error\n"); + "tx-skb dropped due to dma error\n"); /* roll back tpd/buffer */ atl1c_tx_rollback(adapter, tpd, type); dev_kfree_skb_any(skb); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 14c236e5bdb1..c12b4d3e946e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12729,7 +12729,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) } else { /* If no mc addresses are required, flush the configuration */ rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL); - if (rc) + if (rc < 0) BNX2X_ERR("Failed to clear multicast configuration %d\n", rc); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 11e8a866a312..a19f68f5862d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1311,10 +1311,11 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, cp_cons = NEXT_CMP(cp_cons); } - if (unlikely(agg_bufs > MAX_SKB_FRAGS)) { + if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) { bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs); - netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n", - agg_bufs, (int)MAX_SKB_FRAGS); + if (agg_bufs > MAX_SKB_FRAGS) + netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n", + agg_bufs, (int)MAX_SKB_FRAGS); return NULL; } @@ -1573,6 +1574,45 @@ next_rx_no_prod: return rc; } +/* In netpoll mode, if we are using a combined completion ring, we need to + * discard the rx packets and recycle the buffers. + */ +static int bnxt_force_rx_discard(struct bnxt *bp, struct bnxt_napi *bnapi, + u32 *raw_cons, u8 *event) +{ + struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; + u32 tmp_raw_cons = *raw_cons; + struct rx_cmp_ext *rxcmp1; + struct rx_cmp *rxcmp; + u16 cp_cons; + u8 cmp_type; + + cp_cons = RING_CMP(tmp_raw_cons); + rxcmp = (struct rx_cmp *) + &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + + tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons); + cp_cons = RING_CMP(tmp_raw_cons); + rxcmp1 = (struct rx_cmp_ext *) + &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + + if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) + return -EBUSY; + + cmp_type = RX_CMP_TYPE(rxcmp); + if (cmp_type == CMP_TYPE_RX_L2_CMP) { + rxcmp1->rx_cmp_cfa_code_errors_v2 |= + cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR); + } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) { + struct rx_tpa_end_cmp_ext *tpa_end1; + + tpa_end1 = (struct rx_tpa_end_cmp_ext *)rxcmp1; + tpa_end1->rx_tpa_end_cmp_errors_v2 |= + cpu_to_le32(RX_TPA_END_CMP_ERRORS); + } + return bnxt_rx_pkt(bp, bnapi, raw_cons, event); +} + #define BNXT_GET_EVENT_PORT(data) \ ((data) & \ ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK) @@ -1755,7 +1795,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) if (unlikely(tx_pkts > bp->tx_wake_thresh)) rx_pkts = budget; } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { - rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); + if (likely(budget)) + rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); + else + rc = bnxt_force_rx_discard(bp, bnapi, &raw_cons, + &event); if (likely(rc >= 0)) rx_pkts += rc; else if (rc == -EBUSY) /* partial completion */ @@ -6730,12 +6774,11 @@ static void bnxt_poll_controller(struct net_device *dev) struct bnxt *bp = netdev_priv(dev); int i; - for (i = 0; i < bp->cp_nr_rings; i++) { - struct bnxt_irq *irq = &bp->irq_tbl[i]; + /* Only process tx rings/combined rings in netpoll mode. */ + for (i = 0; i < bp->tx_nr_rings; i++) { + struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; - disable_irq(irq->vector); - irq->handler(irq->vector, bp->bnapi[i]); - enable_irq(irq->vector); + napi_schedule(&txr->bnapi->napi); } } #endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 5984423499e6..f872a7db2ca8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -374,12 +374,16 @@ struct rx_tpa_end_cmp_ext { __le32 rx_tpa_end_cmp_errors_v2; #define RX_TPA_END_CMP_V2 (0x1 << 0) - #define RX_TPA_END_CMP_ERRORS (0x7fff << 1) + #define RX_TPA_END_CMP_ERRORS (0x3 << 1) #define RX_TPA_END_CMPL_ERRORS_SHIFT 1 u32 rx_tpa_end_cmp_start_opaque; }; +#define TPA_END_ERRORS(rx_tpa_end_ext) \ + ((rx_tpa_end_ext)->rx_tpa_end_cmp_errors_v2 & \ + cpu_to_le32(RX_TPA_END_CMP_ERRORS)) + #define DB_IDX_MASK 0xffffff #define DB_IDX_VALID (0x1 << 26) #define DB_IRQ_DIS (0x1 << 27) diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index 608bea171956..427d65a1a126 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -29,7 +29,15 @@ config MACB support for the MACB/GEM chip. To compile this driver as a module, choose M here: the module - will be called macb. + will be macb. + +config MACB_USE_HWSTAMP + bool "Use IEEE 1588 hwstamp" + depends on MACB + default y + imply PTP_1588_CLOCK + ---help--- + Enable IEEE 1588 Precision Time Protocol (PTP) support for MACB. config MACB_PCI tristate "Cadence PCI MACB/GEM support" diff --git a/drivers/net/ethernet/cadence/Makefile b/drivers/net/ethernet/cadence/Makefile index 4ba75594d5c5..1d66ddb68969 100644 --- a/drivers/net/ethernet/cadence/Makefile +++ b/drivers/net/ethernet/cadence/Makefile @@ -1,6 +1,11 @@ # # Makefile for the Atmel network device drivers. # +macb-y := macb_main.o + +ifeq ($(CONFIG_MACB_USE_HWSTAMP),y) +macb-y += macb_ptp.o +endif obj-$(CONFIG_MACB) += macb.o obj-$(CONFIG_MACB_PCI) += macb_pci.o diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 2510661102ba..c93f3a2dc6c1 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -11,6 +11,12 @@ #define _MACB_H #include <linux/phy.h> +#include <linux/ptp_clock_kernel.h> +#include <linux/net_tstamp.h> + +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP) +#define MACB_EXT_DESC +#endif #define MACB_GREGS_NBR 16 #define MACB_GREGS_VERSION 2 @@ -86,6 +92,10 @@ #define GEM_SA3T 0x009C /* Specific3 Top */ #define GEM_SA4B 0x00A0 /* Specific4 Bottom */ #define GEM_SA4T 0x00A4 /* Specific4 Top */ +#define GEM_EFTSH 0x00e8 /* PTP Event Frame Transmitted Seconds Register 47:32 */ +#define GEM_EFRSH 0x00ec /* PTP Event Frame Received Seconds Register 47:32 */ +#define GEM_PEFTSH 0x00f0 /* PTP Peer Event Frame Transmitted Seconds Register 47:32 */ +#define GEM_PEFRSH 0x00f4 /* PTP Peer Event Frame Received Seconds Register 47:32 */ #define GEM_OTX 0x0100 /* Octets transmitted */ #define GEM_OCTTXL 0x0100 /* Octets transmitted [31:0] */ #define GEM_OCTTXH 0x0104 /* Octets transmitted [47:32] */ @@ -155,6 +165,9 @@ #define GEM_DCFG6 0x0294 /* Design Config 6 */ #define GEM_DCFG7 0x0298 /* Design Config 7 */ +#define GEM_TXBDCTRL 0x04cc /* TX Buffer Descriptor control register */ +#define GEM_RXBDCTRL 0x04d0 /* RX Buffer Descriptor control register */ + #define GEM_ISR(hw_q) (0x0400 + ((hw_q) << 2)) #define GEM_TBQP(hw_q) (0x0440 + ((hw_q) << 2)) #define GEM_TBQPH(hw_q) (0x04C8) @@ -191,6 +204,8 @@ #define MACB_TZQ_OFFSET 12 /* Transmit zero quantum pause frame */ #define MACB_TZQ_SIZE 1 #define MACB_SRTSM_OFFSET 15 +#define MACB_OSSMODE_OFFSET 24 /* Enable One Step Synchro Mode */ +#define MACB_OSSMODE_SIZE 1 /* Bitfields in NCFGR */ #define MACB_SPD_OFFSET 0 /* Speed */ @@ -269,6 +284,10 @@ #define GEM_RXBS_SIZE 8 #define GEM_DDRP_OFFSET 24 /* disc_when_no_ahb */ #define GEM_DDRP_SIZE 1 +#define GEM_RXEXT_OFFSET 28 /* RX extended Buffer Descriptor mode */ +#define GEM_RXEXT_SIZE 1 +#define GEM_TXEXT_OFFSET 29 /* TX extended Buffer Descriptor mode */ +#define GEM_TXEXT_SIZE 1 #define GEM_ADDR64_OFFSET 30 /* Address bus width - 64b or 32b */ #define GEM_ADDR64_SIZE 1 @@ -425,6 +444,11 @@ #define GEM_TX_PKT_BUFF_OFFSET 21 #define GEM_TX_PKT_BUFF_SIZE 1 + +/* Bitfields in DCFG5. */ +#define GEM_TSU_OFFSET 8 +#define GEM_TSU_SIZE 1 + /* Bitfields in DCFG6. */ #define GEM_PBUF_LSO_OFFSET 27 #define GEM_PBUF_LSO_SIZE 1 @@ -439,6 +463,52 @@ #define GEM_NSINCR_OFFSET 0 #define GEM_NSINCR_SIZE 8 +/* Bitfields in TSH */ +#define GEM_TSH_OFFSET 0 /* TSU timer value (s). MSB [47:32] of seconds timer count */ +#define GEM_TSH_SIZE 16 + +/* Bitfields in TSL */ +#define GEM_TSL_OFFSET 0 /* TSU timer value (s). LSB [31:0] of seconds timer count */ +#define GEM_TSL_SIZE 32 + +/* Bitfields in TN */ +#define GEM_TN_OFFSET 0 /* TSU timer value (ns) */ +#define GEM_TN_SIZE 30 + +/* Bitfields in TXBDCTRL */ +#define GEM_TXTSMODE_OFFSET 4 /* TX Descriptor Timestamp Insertion mode */ +#define GEM_TXTSMODE_SIZE 2 + +/* Bitfields in RXBDCTRL */ +#define GEM_RXTSMODE_OFFSET 4 /* RX Descriptor Timestamp Insertion mode */ +#define GEM_RXTSMODE_SIZE 2 + +/* Transmit DMA buffer descriptor Word 1 */ +#define GEM_DMA_TXVALID_OFFSET 23 /* timestamp has been captured in the Buffer Descriptor */ +#define GEM_DMA_TXVALID_SIZE 1 + +/* Receive DMA buffer descriptor Word 0 */ +#define GEM_DMA_RXVALID_OFFSET 2 /* indicates a valid timestamp in the Buffer Descriptor */ +#define GEM_DMA_RXVALID_SIZE 1 + +/* DMA buffer descriptor Word 2 (32 bit addressing) or Word 4 (64 bit addressing) */ +#define GEM_DMA_SECL_OFFSET 30 /* Timestamp seconds[1:0] */ +#define GEM_DMA_SECL_SIZE 2 +#define GEM_DMA_NSEC_OFFSET 0 /* Timestamp nanosecs [29:0] */ +#define GEM_DMA_NSEC_SIZE 30 + +/* DMA buffer descriptor Word 3 (32 bit addressing) or Word 5 (64 bit addressing) */ + +/* New hardware supports 12 bit precision of timestamp in DMA buffer descriptor. + * Old hardware supports only 6 bit precision but it is enough for PTP. + * Less accuracy is used always instead of checking hardware version. + */ +#define GEM_DMA_SECH_OFFSET 0 /* Timestamp seconds[5:2] */ +#define GEM_DMA_SECH_SIZE 4 +#define GEM_DMA_SEC_WIDTH (GEM_DMA_SECH_SIZE + GEM_DMA_SECL_SIZE) +#define GEM_DMA_SEC_TOP (1 << GEM_DMA_SEC_WIDTH) +#define GEM_DMA_SEC_MASK (GEM_DMA_SEC_TOP - 1) + /* Bitfields in ADJ */ #define GEM_ADDSUB_OFFSET 31 #define GEM_ADDSUB_SIZE 1 @@ -514,6 +584,8 @@ #define queue_readl(queue, reg) (queue)->bp->macb_reg_readl((queue)->bp, (queue)->reg) #define queue_writel(queue, reg, value) (queue)->bp->macb_reg_writel((queue)->bp, (queue)->reg, (value)) +#define PTP_TS_BUFFER_SIZE 128 /* must be power of 2 */ + /* Conditional GEM/MACB macros. These perform the operation to the correct * register dependent on whether the device is a GEM or a MACB. For registers * and bitfields that are common across both devices, use macb_{read,write}l @@ -546,16 +618,26 @@ struct macb_dma_desc { u32 ctrl; }; -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT -enum macb_hw_dma_cap { - HW_DMA_CAP_32B, - HW_DMA_CAP_64B, -}; +#ifdef MACB_EXT_DESC +#define HW_DMA_CAP_32B 0 +#define HW_DMA_CAP_64B (1 << 0) +#define HW_DMA_CAP_PTP (1 << 1) +#define HW_DMA_CAP_64B_PTP (HW_DMA_CAP_64B | HW_DMA_CAP_PTP) struct macb_dma_desc_64 { u32 addrh; u32 resvd; }; + +struct macb_dma_desc_ptp { + u32 ts_1; + u32 ts_2; +}; + +struct gem_tx_ts { + struct sk_buff *skb; + struct macb_dma_desc_ptp desc_ptp; +}; #endif /* DMA descriptor bitfields */ @@ -871,6 +953,11 @@ struct macb_config { int jumbo_max_len; }; +struct tsu_incr { + u32 sub_ns; + u32 ns; +}; + struct macb_queue { struct macb *bp; int irq; @@ -887,6 +974,12 @@ struct macb_queue { struct macb_tx_skb *tx_skb; dma_addr_t tx_ring_dma; struct work_struct tx_error_task; + +#ifdef CONFIG_MACB_USE_HWSTAMP + struct work_struct tx_ts_task; + unsigned int tx_ts_head, tx_ts_tail; + struct gem_tx_ts tx_timestamps[PTP_TS_BUFFER_SIZE]; +#endif }; struct macb { @@ -955,11 +1048,62 @@ struct macb { u32 wol; struct macb_ptp_info *ptp_info; /* macb-ptp interface */ -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - enum macb_hw_dma_cap hw_dma_cap; +#ifdef MACB_EXT_DESC + uint8_t hw_dma_cap; #endif + spinlock_t tsu_clk_lock; /* gem tsu clock locking */ + unsigned int tsu_rate; + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_clock_info; + struct tsu_incr tsu_incr; + struct hwtstamp_config tstamp_config; }; +#ifdef CONFIG_MACB_USE_HWSTAMP +#define GEM_TSEC_SIZE (GEM_TSH_SIZE + GEM_TSL_SIZE) +#define TSU_SEC_MAX_VAL (((u64)1 << GEM_TSEC_SIZE) - 1) +#define TSU_NSEC_MAX_VAL ((1 << GEM_TN_SIZE) - 1) + +enum macb_bd_control { + TSTAMP_DISABLED, + TSTAMP_FRAME_PTP_EVENT_ONLY, + TSTAMP_ALL_PTP_FRAMES, + TSTAMP_ALL_FRAMES, +}; + +void gem_ptp_init(struct net_device *ndev); +void gem_ptp_remove(struct net_device *ndev); +int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *des); +void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc); +static inline int gem_ptp_do_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *desc) +{ + if (queue->bp->tstamp_config.tx_type == TSTAMP_DISABLED) + return -ENOTSUPP; + + return gem_ptp_txstamp(queue, skb, desc); +} + +static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) +{ + if (bp->tstamp_config.rx_filter == TSTAMP_DISABLED) + return; + + gem_ptp_rxstamp(bp, skb, desc); +} +int gem_get_hwtst(struct net_device *dev, struct ifreq *rq); +int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd); +#else +static inline void gem_ptp_init(struct net_device *ndev) { } +static inline void gem_ptp_remove(struct net_device *ndev) { } + +static inline int gem_ptp_do_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *desc) +{ + return -1; +} + +static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { } +#endif + static inline bool macb_is_gem(struct macb *bp) { return !!(bp->caps & MACB_CAPS_MACB_IS_GEM); diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb_main.c index 3ae9d8071ded..41e5711544fc 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -79,33 +79,84 @@ #define MACB_HALT_TIMEOUT 1230 /* DMA buffer descriptor might be different size - * depends on hardware configuration. + * depends on hardware configuration: + * + * 1. dma address width 32 bits: + * word 1: 32 bit address of Data Buffer + * word 2: control + * + * 2. dma address width 64 bits: + * word 1: 32 bit address of Data Buffer + * word 2: control + * word 3: upper 32 bit address of Data Buffer + * word 4: unused + * + * 3. dma address width 32 bits with hardware timestamping: + * word 1: 32 bit address of Data Buffer + * word 2: control + * word 3: timestamp word 1 + * word 4: timestamp word 2 + * + * 4. dma address width 64 bits with hardware timestamping: + * word 1: 32 bit address of Data Buffer + * word 2: control + * word 3: upper 32 bit address of Data Buffer + * word 4: unused + * word 5: timestamp word 1 + * word 6: timestamp word 2 */ static unsigned int macb_dma_desc_get_size(struct macb *bp) { -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap == HW_DMA_CAP_64B) - return sizeof(struct macb_dma_desc) + sizeof(struct macb_dma_desc_64); +#ifdef MACB_EXT_DESC + unsigned int desc_size; + + switch (bp->hw_dma_cap) { + case HW_DMA_CAP_64B: + desc_size = sizeof(struct macb_dma_desc) + + sizeof(struct macb_dma_desc_64); + break; + case HW_DMA_CAP_PTP: + desc_size = sizeof(struct macb_dma_desc) + + sizeof(struct macb_dma_desc_ptp); + break; + case HW_DMA_CAP_64B_PTP: + desc_size = sizeof(struct macb_dma_desc) + + sizeof(struct macb_dma_desc_64) + + sizeof(struct macb_dma_desc_ptp); + break; + default: + desc_size = sizeof(struct macb_dma_desc); + } + return desc_size; #endif return sizeof(struct macb_dma_desc); } -static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int idx) +static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx) { -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - /* Dma buffer descriptor is 4 words length (instead of 2 words) - * for 64b GEM. - */ - if (bp->hw_dma_cap == HW_DMA_CAP_64B) - idx <<= 1; +#ifdef MACB_EXT_DESC + switch (bp->hw_dma_cap) { + case HW_DMA_CAP_64B: + case HW_DMA_CAP_PTP: + desc_idx <<= 1; + break; + case HW_DMA_CAP_64B_PTP: + desc_idx *= 3; + break; + default: + break; + } + return desc_idx; #endif - return idx; + return desc_idx; } #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc) { - return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc)); + if (bp->hw_dma_cap & HW_DMA_CAP_64B) + return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc)); + return NULL; } #endif @@ -621,7 +672,7 @@ static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_ #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT struct macb_dma_desc_64 *desc_64; - if (bp->hw_dma_cap == HW_DMA_CAP_64B) { + if (bp->hw_dma_cap & HW_DMA_CAP_64B) { desc_64 = macb_64b_desc(bp, desc); desc_64->addrh = upper_32_bits(addr); } @@ -635,7 +686,7 @@ static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc) #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT struct macb_dma_desc_64 *desc_64; - if (bp->hw_dma_cap == HW_DMA_CAP_64B) { + if (bp->hw_dma_cap & HW_DMA_CAP_64B) { desc_64 = macb_64b_desc(bp, desc); addr = ((u64)(desc_64->addrh) << 32); } @@ -734,7 +785,7 @@ static void macb_tx_error_task(struct work_struct *work) /* Reinitialize the TX desc queue */ queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma)); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap == HW_DMA_CAP_64B) + if (bp->hw_dma_cap & HW_DMA_CAP_64B) queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma)); #endif /* Make TX ring reflect state of hardware */ @@ -796,6 +847,12 @@ static void macb_tx_interrupt(struct macb_queue *queue) /* First, update TX stats if needed */ if (skb) { + if (gem_ptp_do_txstamp(queue, skb, desc) == 0) { + /* skb now belongs to timestamp buffer + * and will be removed later + */ + tx_skb->skb = NULL; + } netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n", macb_tx_ring_wrap(bp, tail), skb->data); @@ -962,6 +1019,8 @@ static int gem_rx(struct macb *bp, int budget) bp->dev->stats.rx_packets++; bp->dev->stats.rx_bytes += skb->len; + gem_ptp_do_rxstamp(bp, skb, desc); + #if defined(DEBUG) && defined(VERBOSE_DEBUG) netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n", skb->len, skb->csum); @@ -1283,7 +1342,6 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(HRESP)); } - status = queue_readl(queue, ISR); } @@ -1613,7 +1671,6 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Make newly initialized descriptor visible to hardware */ wmb(); - skb_tx_timestamp(skb); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); @@ -1942,9 +1999,13 @@ static void macb_configure_dma(struct macb *bp) dmacfg &= ~GEM_BIT(TXCOEN); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap == HW_DMA_CAP_64B) + if (bp->hw_dma_cap & HW_DMA_CAP_64B) dmacfg |= GEM_BIT(ADDR64); #endif +#ifdef CONFIG_MACB_USE_HWSTAMP + if (bp->hw_dma_cap & HW_DMA_CAP_PTP) + dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT); +#endif netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n", dmacfg); gem_writel(bp, DMACFG, dmacfg); @@ -1992,13 +2053,13 @@ static void macb_init_hw(struct macb *bp) /* Initialize TX and RX buffers */ macb_writel(bp, RBQP, lower_32_bits(bp->rx_ring_dma)); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap == HW_DMA_CAP_64B) + if (bp->hw_dma_cap & HW_DMA_CAP_64B) macb_writel(bp, RBQPH, upper_32_bits(bp->rx_ring_dma)); #endif for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma)); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap == HW_DMA_CAP_64B) + if (bp->hw_dma_cap & HW_DMA_CAP_64B) queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma)); #endif @@ -2467,6 +2528,70 @@ static int macb_set_ringparam(struct net_device *netdev, return 0; } +#ifdef CONFIG_MACB_USE_HWSTAMP +static unsigned int gem_get_tsu_rate(struct macb *bp) +{ + struct clk *tsu_clk; + unsigned int tsu_rate; + + tsu_clk = devm_clk_get(&bp->pdev->dev, "tsu_clk"); + if (!IS_ERR(tsu_clk)) + tsu_rate = clk_get_rate(tsu_clk); + /* try pclk instead */ + else if (!IS_ERR(bp->pclk)) { + tsu_clk = bp->pclk; + tsu_rate = clk_get_rate(tsu_clk); + } else + return -ENOTSUPP; + return tsu_rate; +} + +static s32 gem_get_ptp_max_adj(void) +{ + return 64000000; +} + +static int gem_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct macb *bp = netdev_priv(dev); + + if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) { + ethtool_op_get_ts_info(dev, info); + return 0; + } + + info->so_timestamping = + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->tx_types = + (1 << HWTSTAMP_TX_ONESTEP_SYNC) | + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + info->rx_filters = + (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_ALL); + + info->phc_index = bp->ptp_clock ? ptp_clock_index(bp->ptp_clock) : -1; + + return 0; +} + +static struct macb_ptp_info gem_ptp_info = { + .ptp_init = gem_ptp_init, + .ptp_remove = gem_ptp_remove, + .get_ptp_max_adj = gem_get_ptp_max_adj, + .get_tsu_rate = gem_get_tsu_rate, + .get_ts_info = gem_get_ts_info, + .get_hwtst = gem_get_hwtst, + .set_hwtst = gem_set_hwtst, +}; +#endif + static int macb_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) { @@ -2600,6 +2725,16 @@ static void macb_configure_caps(struct macb *bp, dcfg = gem_readl(bp, DCFG2); if ((dcfg & (GEM_BIT(RX_PKT_BUFF) | GEM_BIT(TX_PKT_BUFF))) == 0) bp->caps |= MACB_CAPS_FIFO_MODE; +#ifdef CONFIG_MACB_USE_HWSTAMP + if (gem_has_ptp(bp)) { + if (!GEM_BFEXT(TSU, gem_readl(bp, DCFG5))) + pr_err("GEM doesn't support hardware ptp.\n"); + else { + bp->hw_dma_cap |= HW_DMA_CAP_PTP; + bp->ptp_info = &gem_ptp_info; + } + } +#endif } dev_dbg(&bp->pdev->dev, "Cadence caps 0x%08x\n", bp->caps); @@ -2737,7 +2872,7 @@ static int macb_init(struct platform_device *pdev) queue->IMR = GEM_IMR(hw_q - 1); queue->TBQP = GEM_TBQP(hw_q - 1); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap == HW_DMA_CAP_64B) + if (bp->hw_dma_cap & HW_DMA_CAP_64B) queue->TBQPH = GEM_TBQPH(hw_q - 1); #endif } else { @@ -2748,7 +2883,7 @@ static int macb_init(struct platform_device *pdev) queue->IMR = MACB_IMR; queue->TBQP = MACB_TBQP; #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap == HW_DMA_CAP_64B) + if (bp->hw_dma_cap & HW_DMA_CAP_64B) queue->TBQPH = MACB_TBQPH; #endif } @@ -3205,7 +3340,9 @@ static const struct macb_config np4_config = { }; static const struct macb_config zynqmp_config = { - .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO, + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | + MACB_CAPS_JUMBO | + MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, @@ -3239,7 +3376,9 @@ MODULE_DEVICE_TABLE(of, macb_dt_ids); #endif /* CONFIG_OF */ static const struct macb_config default_gem_config = { - .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO, + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | + MACB_CAPS_JUMBO | + MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, @@ -3328,19 +3467,17 @@ static int macb_probe(struct platform_device *pdev) bp->wol |= MACB_WOL_HAS_MAGIC_PACKET; device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET); -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) { - dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); - bp->hw_dma_cap = HW_DMA_CAP_64B; - } else - bp->hw_dma_cap = HW_DMA_CAP_32B; -#endif - spin_lock_init(&bp->lock); /* setup capabilities */ macb_configure_caps(bp, macb_config); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) { + dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); + bp->hw_dma_cap |= HW_DMA_CAP_64B; + } +#endif platform_set_drvdata(pdev, dev); dev->irq = platform_get_irq(pdev, 0); diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c new file mode 100755 index 000000000000..67cca08472b7 --- /dev/null +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -0,0 +1,518 @@ +/** + * 1588 PTP support for Cadence GEM device. + * + * Copyright (C) 2017 Cadence Design Systems - http://www.cadence.com + * + * Authors: Rafal Ozieblo <[email protected]> + * Bartosz Folta <[email protected]> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/etherdevice.h> +#include <linux/platform_device.h> +#include <linux/time64.h> +#include <linux/ptp_classify.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/net_tstamp.h> +#include <linux/circ_buf.h> +#include <linux/spinlock.h> + +#include "macb.h" + +#define GEM_PTP_TIMER_NAME "gem-ptp-timer" + +static struct macb_dma_desc_ptp *macb_ptp_desc(struct macb *bp, + struct macb_dma_desc *desc) +{ + if (bp->hw_dma_cap == HW_DMA_CAP_PTP) + return (struct macb_dma_desc_ptp *) + ((u8 *)desc + sizeof(struct macb_dma_desc)); + if (bp->hw_dma_cap == HW_DMA_CAP_64B_PTP) + return (struct macb_dma_desc_ptp *) + ((u8 *)desc + sizeof(struct macb_dma_desc) + + sizeof(struct macb_dma_desc_64)); + return NULL; +} + +static int gem_tsu_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts) +{ + struct macb *bp = container_of(ptp, struct macb, ptp_clock_info); + unsigned long flags; + long first, second; + u32 secl, sech; + + spin_lock_irqsave(&bp->tsu_clk_lock, flags); + first = gem_readl(bp, TN); + secl = gem_readl(bp, TSL); + sech = gem_readl(bp, TSH); + second = gem_readl(bp, TN); + + /* test for nsec rollover */ + if (first > second) { + /* if so, use later read & re-read seconds + * (assume all done within 1s) + */ + ts->tv_nsec = gem_readl(bp, TN); + secl = gem_readl(bp, TSL); + sech = gem_readl(bp, TSH); + } else { + ts->tv_nsec = first; + } + + spin_unlock_irqrestore(&bp->tsu_clk_lock, flags); + ts->tv_sec = (((u64)sech << GEM_TSL_SIZE) | secl) + & TSU_SEC_MAX_VAL; + return 0; +} + +static int gem_tsu_set_time(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct macb *bp = container_of(ptp, struct macb, ptp_clock_info); + unsigned long flags; + u32 ns, sech, secl; + + secl = (u32)ts->tv_sec; + sech = (ts->tv_sec >> GEM_TSL_SIZE) & ((1 << GEM_TSH_SIZE) - 1); + ns = ts->tv_nsec; + + spin_lock_irqsave(&bp->tsu_clk_lock, flags); + + /* TSH doesn't latch the time and no atomicity! */ + gem_writel(bp, TN, 0); /* clear to avoid overflow */ + gem_writel(bp, TSH, sech); + /* write lower bits 2nd, for synchronized secs update */ + gem_writel(bp, TSL, secl); + gem_writel(bp, TN, ns); + + spin_unlock_irqrestore(&bp->tsu_clk_lock, flags); + + return 0; +} + +static int gem_tsu_incr_set(struct macb *bp, struct tsu_incr *incr_spec) +{ + unsigned long flags; + + /* tsu_timer_incr register must be written after + * the tsu_timer_incr_sub_ns register and the write operation + * will cause the value written to the tsu_timer_incr_sub_ns register + * to take effect. + */ + spin_lock_irqsave(&bp->tsu_clk_lock, flags); + gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, incr_spec->sub_ns)); + gem_writel(bp, TI, GEM_BF(NSINCR, incr_spec->ns)); + spin_unlock_irqrestore(&bp->tsu_clk_lock, flags); + + return 0; +} + +static int gem_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct macb *bp = container_of(ptp, struct macb, ptp_clock_info); + struct tsu_incr incr_spec; + bool neg_adj = false; + u32 word; + u64 adj; + + if (scaled_ppm < 0) { + neg_adj = true; + scaled_ppm = -scaled_ppm; + } + + /* Adjustment is relative to base frequency */ + incr_spec.sub_ns = bp->tsu_incr.sub_ns; + incr_spec.ns = bp->tsu_incr.ns; + + /* scaling: unused(8bit) | ns(8bit) | fractions(16bit) */ + word = ((u64)incr_spec.ns << GEM_SUBNSINCR_SIZE) + incr_spec.sub_ns; + adj = (u64)scaled_ppm * word; + /* Divide with rounding, equivalent to floating dividing: + * (temp / USEC_PER_SEC) + 0.5 + */ + adj += (USEC_PER_SEC >> 1); + adj >>= GEM_SUBNSINCR_SIZE; /* remove fractions */ + adj = div_u64(adj, USEC_PER_SEC); + adj = neg_adj ? (word - adj) : (word + adj); + + incr_spec.ns = (adj >> GEM_SUBNSINCR_SIZE) + & ((1 << GEM_NSINCR_SIZE) - 1); + incr_spec.sub_ns = adj & ((1 << GEM_SUBNSINCR_SIZE) - 1); + gem_tsu_incr_set(bp, &incr_spec); + return 0; +} + +static int gem_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct macb *bp = container_of(ptp, struct macb, ptp_clock_info); + struct timespec64 now, then = ns_to_timespec64(delta); + u32 adj, sign = 0; + + if (delta < 0) { + sign = 1; + delta = -delta; + } + + if (delta > TSU_NSEC_MAX_VAL) { + gem_tsu_get_time(&bp->ptp_clock_info, &now); + if (sign) + now = timespec64_sub(now, then); + else + now = timespec64_add(now, then); + + gem_tsu_set_time(&bp->ptp_clock_info, + (const struct timespec64 *)&now); + } else { + adj = (sign << GEM_ADDSUB_OFFSET) | delta; + + gem_writel(bp, TA, adj); + } + + return 0; +} + +static int gem_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + return -EOPNOTSUPP; +} + +static struct ptp_clock_info gem_ptp_caps_template = { + .owner = THIS_MODULE, + .name = GEM_PTP_TIMER_NAME, + .max_adj = 0, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .n_pins = 0, + .pps = 1, + .adjfine = gem_ptp_adjfine, + .adjtime = gem_ptp_adjtime, + .gettime64 = gem_tsu_get_time, + .settime64 = gem_tsu_set_time, + .enable = gem_ptp_enable, +}; + +static void gem_ptp_init_timer(struct macb *bp) +{ + u32 rem = 0; + u64 adj; + + bp->tsu_incr.ns = div_u64_rem(NSEC_PER_SEC, bp->tsu_rate, &rem); + if (rem) { + adj = rem; + adj <<= GEM_SUBNSINCR_SIZE; + bp->tsu_incr.sub_ns = div_u64(adj, bp->tsu_rate); + } else { + bp->tsu_incr.sub_ns = 0; + } +} + +static void gem_ptp_init_tsu(struct macb *bp) +{ + struct timespec64 ts; + + /* 1. get current system time */ + ts = ns_to_timespec64(ktime_to_ns(ktime_get_real())); + + /* 2. set ptp timer */ + gem_tsu_set_time(&bp->ptp_clock_info, &ts); + + /* 3. set PTP timer increment value to BASE_INCREMENT */ + gem_tsu_incr_set(bp, &bp->tsu_incr); + + gem_writel(bp, TA, 0); +} + +static void gem_ptp_clear_timer(struct macb *bp) +{ + bp->tsu_incr.sub_ns = 0; + bp->tsu_incr.ns = 0; + + gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, 0)); + gem_writel(bp, TI, GEM_BF(NSINCR, 0)); + gem_writel(bp, TA, 0); +} + +static int gem_hw_timestamp(struct macb *bp, u32 dma_desc_ts_1, + u32 dma_desc_ts_2, struct timespec64 *ts) +{ + struct timespec64 tsu; + + ts->tv_sec = (GEM_BFEXT(DMA_SECH, dma_desc_ts_2) << GEM_DMA_SECL_SIZE) | + GEM_BFEXT(DMA_SECL, dma_desc_ts_1); + ts->tv_nsec = GEM_BFEXT(DMA_NSEC, dma_desc_ts_1); + + /* TSU overlapping workaround + * The timestamp only contains lower few bits of seconds, + * so add value from 1588 timer + */ + gem_tsu_get_time(&bp->ptp_clock_info, &tsu); + + /* If the top bit is set in the timestamp, + * but not in 1588 timer, it has rolled over, + * so subtract max size + */ + if ((ts->tv_sec & (GEM_DMA_SEC_TOP >> 1)) && + !(tsu.tv_sec & (GEM_DMA_SEC_TOP >> 1))) + ts->tv_sec -= GEM_DMA_SEC_TOP; + + ts->tv_sec += ((~GEM_DMA_SEC_MASK) & tsu.tv_sec); + + return 0; +} + +void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb, + struct macb_dma_desc *desc) +{ + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + struct macb_dma_desc_ptp *desc_ptp; + struct timespec64 ts; + + if (GEM_BFEXT(DMA_RXVALID, desc->addr)) { + desc_ptp = macb_ptp_desc(bp, desc); + gem_hw_timestamp(bp, desc_ptp->ts_1, desc_ptp->ts_2, &ts); + memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps)); + shhwtstamps->hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); + } +} + +static void gem_tstamp_tx(struct macb *bp, struct sk_buff *skb, + struct macb_dma_desc_ptp *desc_ptp) +{ + struct skb_shared_hwtstamps shhwtstamps; + struct timespec64 ts; + + gem_hw_timestamp(bp, desc_ptp->ts_1, desc_ptp->ts_2, &ts); + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); + skb_tstamp_tx(skb, &shhwtstamps); +} + +int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb, + struct macb_dma_desc *desc) +{ + unsigned long tail = READ_ONCE(queue->tx_ts_tail); + unsigned long head = queue->tx_ts_head; + struct macb_dma_desc_ptp *desc_ptp; + struct gem_tx_ts *tx_timestamp; + + if (!GEM_BFEXT(DMA_TXVALID, desc->ctrl)) + return -EINVAL; + + if (CIRC_SPACE(head, tail, PTP_TS_BUFFER_SIZE) == 0) + return -ENOMEM; + + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + desc_ptp = macb_ptp_desc(queue->bp, desc); + tx_timestamp = &queue->tx_timestamps[head]; + tx_timestamp->skb = skb; + tx_timestamp->desc_ptp.ts_1 = desc_ptp->ts_1; + tx_timestamp->desc_ptp.ts_2 = desc_ptp->ts_2; + /* move head */ + smp_store_release(&queue->tx_ts_head, + (head + 1) & (PTP_TS_BUFFER_SIZE - 1)); + + schedule_work(&queue->tx_ts_task); + return 0; +} + +static void gem_tx_timestamp_flush(struct work_struct *work) +{ + struct macb_queue *queue = + container_of(work, struct macb_queue, tx_ts_task); + unsigned long head, tail; + struct gem_tx_ts *tx_ts; + + /* take current head */ + head = smp_load_acquire(&queue->tx_ts_head); + tail = queue->tx_ts_tail; + + while (CIRC_CNT(head, tail, PTP_TS_BUFFER_SIZE)) { + tx_ts = &queue->tx_timestamps[tail]; + gem_tstamp_tx(queue->bp, tx_ts->skb, &tx_ts->desc_ptp); + /* cleanup */ + dev_kfree_skb_any(tx_ts->skb); + /* remove old tail */ + smp_store_release(&queue->tx_ts_tail, + (tail + 1) & (PTP_TS_BUFFER_SIZE - 1)); + tail = queue->tx_ts_tail; + } +} + +void gem_ptp_init(struct net_device *dev) +{ + struct macb *bp = netdev_priv(dev); + struct macb_queue *queue; + unsigned int q; + + bp->ptp_clock_info = gem_ptp_caps_template; + + /* nominal frequency and maximum adjustment in ppb */ + bp->tsu_rate = bp->ptp_info->get_tsu_rate(bp); + bp->ptp_clock_info.max_adj = bp->ptp_info->get_ptp_max_adj(); + gem_ptp_init_timer(bp); + bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &dev->dev); + if (IS_ERR(bp->ptp_clock)) { + pr_err("ptp clock register failed: %ld\n", + PTR_ERR(bp->ptp_clock)); + bp->ptp_clock = NULL; + return; + } else if (bp->ptp_clock == NULL) { + pr_err("ptp clock register failed\n"); + return; + } + + spin_lock_init(&bp->tsu_clk_lock); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + queue->tx_ts_head = 0; + queue->tx_ts_tail = 0; + INIT_WORK(&queue->tx_ts_task, gem_tx_timestamp_flush); + } + + gem_ptp_init_tsu(bp); + + dev_info(&bp->pdev->dev, "%s ptp clock registered.\n", + GEM_PTP_TIMER_NAME); +} + +void gem_ptp_remove(struct net_device *ndev) +{ + struct macb *bp = netdev_priv(ndev); + + if (bp->ptp_clock) + ptp_clock_unregister(bp->ptp_clock); + + gem_ptp_clear_timer(bp); + + dev_info(&bp->pdev->dev, "%s ptp clock unregistered.\n", + GEM_PTP_TIMER_NAME); +} + +static int gem_ptp_set_ts_mode(struct macb *bp, + enum macb_bd_control tx_bd_control, + enum macb_bd_control rx_bd_control) +{ + gem_writel(bp, TXBDCTRL, GEM_BF(TXTSMODE, tx_bd_control)); + gem_writel(bp, RXBDCTRL, GEM_BF(RXTSMODE, rx_bd_control)); + + return 0; +} + +int gem_get_hwtst(struct net_device *dev, struct ifreq *rq) +{ + struct hwtstamp_config *tstamp_config; + struct macb *bp = netdev_priv(dev); + + tstamp_config = &bp->tstamp_config; + if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) + return -EOPNOTSUPP; + + if (copy_to_user(rq->ifr_data, tstamp_config, sizeof(*tstamp_config))) + return -EFAULT; + else + return 0; +} + +static int gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) +{ + u32 reg_val; + + reg_val = macb_readl(bp, NCR); + + if (enable) + macb_writel(bp, NCR, reg_val | MACB_BIT(OSSMODE)); + else + macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE)); + + return 0; +} + +int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + enum macb_bd_control tx_bd_control = TSTAMP_DISABLED; + enum macb_bd_control rx_bd_control = TSTAMP_DISABLED; + struct hwtstamp_config *tstamp_config; + struct macb *bp = netdev_priv(dev); + u32 regval; + + tstamp_config = &bp->tstamp_config; + if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) + return -EOPNOTSUPP; + + if (copy_from_user(tstamp_config, ifr->ifr_data, + sizeof(*tstamp_config))) + return -EFAULT; + + /* reserved for future extensions */ + if (tstamp_config->flags) + return -EINVAL; + + switch (tstamp_config->tx_type) { + case HWTSTAMP_TX_OFF: + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + if (gem_ptp_set_one_step_sync(bp, 1) != 0) + return -ERANGE; + case HWTSTAMP_TX_ON: + tx_bd_control = TSTAMP_ALL_FRAMES; + break; + default: + return -ERANGE; + } + + switch (tstamp_config->rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + rx_bd_control = TSTAMP_ALL_PTP_FRAMES; + tstamp_config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + regval = macb_readl(bp, NCR); + macb_writel(bp, NCR, (regval | MACB_BIT(SRTSM))); + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_ALL: + rx_bd_control = TSTAMP_ALL_FRAMES; + tstamp_config->rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + tstamp_config->rx_filter = HWTSTAMP_FILTER_NONE; + return -ERANGE; + } + + if (gem_ptp_set_ts_mode(bp, tx_bd_control, rx_bd_control) != 0) + return -ERANGE; + + if (copy_to_user(ifr->ifr_data, tstamp_config, sizeof(*tstamp_config))) + return -EFAULT; + else + return 0; +} + diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 573755b0a51b..49b80da51ba7 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -227,15 +227,14 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic) nic->speed = mbx.link_status.speed; nic->mac_type = mbx.link_status.mac_type; if (nic->link_up) { - netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n", - nic->netdev->name, nic->speed, + netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n", + nic->speed, nic->duplex == DUPLEX_FULL ? - "Full duplex" : "Half duplex"); + "Full" : "Half"); netif_carrier_on(nic->netdev); netif_tx_start_all_queues(nic->netdev); } else { - netdev_info(nic->netdev, "%s: Link is Down\n", - nic->netdev->name); + netdev_info(nic->netdev, "Link is Down\n"); netif_carrier_off(nic->netdev); netif_tx_stop_all_queues(nic->netdev); } @@ -721,8 +720,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, return; if (netif_msg_pktdata(nic)) { - netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name, - skb, skb->len); + netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len); print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); } @@ -854,10 +852,8 @@ done: netif_tx_wake_queue(txq); nic = nic->pnicvf; this_cpu_inc(nic->drv_stats->txq_wake); - if (netif_msg_tx_err(nic)) - netdev_warn(netdev, - "%s: Transmit queue wakeup SQ%d\n", - netdev->name, txq_idx); + netif_warn(nic, tx_err, netdev, + "Transmit queue wakeup SQ%d\n", txq_idx); } } @@ -928,9 +924,8 @@ static void nicvf_handle_qs_err(unsigned long data) static void nicvf_dump_intr_status(struct nicvf *nic) { - if (netif_msg_intr(nic)) - netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n", - nic->netdev->name, nicvf_reg_read(nic, NIC_VF_INT)); + netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n", + nicvf_reg_read(nic, NIC_VF_INT)); } static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq) @@ -1212,10 +1207,8 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) netif_tx_wake_queue(txq); } else { this_cpu_inc(nic->drv_stats->txq_stop); - if (netif_msg_tx_err(nic)) - netdev_warn(netdev, - "%s: Transmit ring full, stopping SQ%d\n", - netdev->name, qid); + netif_warn(nic, tx_err, netdev, + "Transmit ring full, stopping SQ%d\n", qid); } return NETDEV_TX_BUSY; } @@ -1600,9 +1593,7 @@ static void nicvf_tx_timeout(struct net_device *dev) { struct nicvf *nic = netdev_priv(dev); - if (netif_msg_tx_err(nic)) - netdev_warn(dev, "%s: Transmit timed out, resetting\n", - dev->name); + netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n"); this_cpu_inc(nic->drv_stats->tx_timeout); schedule_work(&nic->reset_task); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 2b181762ad49..d4496e9afcdf 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1811,11 +1811,9 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx) /* Check for errors in the receive cmp.queue entry */ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) { - if (netif_msg_rx_err(nic)) - netdev_err(nic->netdev, - "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n", - nic->netdev->name, - cqe_rx->err_level, cqe_rx->err_opcode); + netif_err(nic, rx_err, nic->netdev, + "RX error CQE err_level 0x%x err_opcode 0x%x\n", + cqe_rx->err_level, cqe_rx->err_opcode); switch (cqe_rx->err_opcode) { case CQ_RX_ERROP_RE_PARTIAL: diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig index dc0850b3b517..8870a9a798ca 100644 --- a/drivers/net/ethernet/freescale/fman/Kconfig +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -2,6 +2,7 @@ config FSL_FMAN tristate "FMan support" depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST select GENERIC_ALLOCATOR + depends on HAS_DMA select PHYLIB default n help diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index a79e257bc338..c4b4b0a1bbf0 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1718,7 +1718,7 @@ static int gfar_restore(struct device *dev) return 0; } -static struct dev_pm_ops gfar_pm_ops = { +static const struct dev_pm_ops gfar_pm_ops = { .suspend = gfar_suspend, .resume = gfar_resume, .freeze = gfar_suspend, diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index 04211ac73b36..7ba653af19cb 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -360,6 +360,7 @@ enum hnae_loop { MAC_INTERNALLOOP_MAC = 0, MAC_INTERNALLOOP_SERDES, MAC_INTERNALLOOP_PHY, + MAC_LOOP_PHY_NONE, MAC_LOOP_NONE, }; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 00e57bbaf122..a8db27e86a11 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -259,67 +259,27 @@ static const char hns_nic_test_strs[][ETH_GSTRING_LEN] = { static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en) { -#define COPPER_CONTROL_REG 0 -#define PHY_POWER_DOWN BIT(11) -#define PHY_LOOP_BACK BIT(14) - u16 val = 0; - - if (phy_dev->is_c45) /* c45 branch adding for XGE PHY */ - return -ENOTSUPP; + int err; if (en) { - /* speed : 1000M */ - phy_write(phy_dev, HNS_PHY_PAGE_REG, 2); - phy_write(phy_dev, 21, 0x1046); - - phy_write(phy_dev, HNS_PHY_PAGE_REG, 0); - /* Force Master */ - phy_write(phy_dev, 9, 0x1F00); - - /* Soft-reset */ - phy_write(phy_dev, 0, 0x9140); - /* If autoneg disabled,two soft-reset operations */ - phy_write(phy_dev, 0, 0x9140); - - phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA); - - /* Default is 0x0400 */ - phy_write(phy_dev, 1, 0x418); - - /* Force 1000M Link, Default is 0x0200 */ - phy_write(phy_dev, 7, 0x20C); - - /* Powerup Fiber */ - phy_write(phy_dev, HNS_PHY_PAGE_REG, 1); - val = phy_read(phy_dev, COPPER_CONTROL_REG); - val &= ~PHY_POWER_DOWN; - phy_write(phy_dev, COPPER_CONTROL_REG, val); - - /* Enable Phy Loopback */ - phy_write(phy_dev, HNS_PHY_PAGE_REG, 0); - val = phy_read(phy_dev, COPPER_CONTROL_REG); - val |= PHY_LOOP_BACK; - val &= ~PHY_POWER_DOWN; - phy_write(phy_dev, COPPER_CONTROL_REG, val); + /* Doing phy loopback in offline state, phy resuming is + * needed to power up the device. + */ + err = phy_resume(phy_dev); + if (err) + goto out; + + err = phy_loopback(phy_dev, true); } else { - phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA); - phy_write(phy_dev, 1, 0x400); - phy_write(phy_dev, 7, 0x200); - - phy_write(phy_dev, HNS_PHY_PAGE_REG, 1); - val = phy_read(phy_dev, COPPER_CONTROL_REG); - val |= PHY_POWER_DOWN; - phy_write(phy_dev, COPPER_CONTROL_REG, val); - - phy_write(phy_dev, HNS_PHY_PAGE_REG, 0); - phy_write(phy_dev, 9, 0xF00); - - val = phy_read(phy_dev, COPPER_CONTROL_REG); - val &= ~PHY_LOOP_BACK; - val |= PHY_POWER_DOWN; - phy_write(phy_dev, COPPER_CONTROL_REG, val); + err = phy_loopback(phy_dev, false); + if (err) + goto out; + + err = phy_suspend(phy_dev); } - return 0; + +out: + return err; } static int __lb_setup(struct net_device *ndev, @@ -332,10 +292,9 @@ static int __lb_setup(struct net_device *ndev, switch (loop) { case MAC_INTERNALLOOP_PHY: - if ((phy_dev) && (!phy_dev->is_c45)) { - ret = hns_nic_config_phy_loopback(phy_dev, 0x1); - ret |= h->dev->ops->set_loopback(h, loop, 0x1); - } + ret = hns_nic_config_phy_loopback(phy_dev, 0x1); + if (!ret) + ret = h->dev->ops->set_loopback(h, loop, 0x1); break; case MAC_INTERNALLOOP_MAC: if ((h->dev->ops->set_loopback) && @@ -346,17 +305,17 @@ static int __lb_setup(struct net_device *ndev, if (h->dev->ops->set_loopback) ret = h->dev->ops->set_loopback(h, loop, 0x1); break; + case MAC_LOOP_PHY_NONE: + ret = hns_nic_config_phy_loopback(phy_dev, 0x0); case MAC_LOOP_NONE: - if ((phy_dev) && (!phy_dev->is_c45)) - ret |= hns_nic_config_phy_loopback(phy_dev, 0x0); - - if (h->dev->ops->set_loopback) { + if (!ret && h->dev->ops->set_loopback) { if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII) - ret |= h->dev->ops->set_loopback(h, + ret = h->dev->ops->set_loopback(h, MAC_INTERNALLOOP_MAC, 0x0); - ret |= h->dev->ops->set_loopback(h, - MAC_INTERNALLOOP_SERDES, 0x0); + if (!ret) + ret = h->dev->ops->set_loopback(h, + MAC_INTERNALLOOP_SERDES, 0x0); } break; default: @@ -582,13 +541,16 @@ static int __lb_run_test(struct net_device *ndev, return ret_val; } -static int __lb_down(struct net_device *ndev) +static int __lb_down(struct net_device *ndev, enum hnae_loop loop) { struct hns_nic_priv *priv = netdev_priv(ndev); struct hnae_handle *h = priv->ae_handle; int ret; - ret = __lb_setup(ndev, MAC_LOOP_NONE); + if (loop == MAC_INTERNALLOOP_PHY) + ret = __lb_setup(ndev, MAC_LOOP_PHY_NONE); + else + ret = __lb_setup(ndev, MAC_LOOP_NONE); if (ret) netdev_err(ndev, "%s: __lb_setup return error(%d)!\n", __func__, @@ -644,7 +606,8 @@ static void hns_nic_self_test(struct net_device *ndev, if (!data[test_index]) { data[test_index] = __lb_run_test( ndev, (enum hnae_loop)st_param[i][0]); - (void)__lb_down(ndev); + (void)__lb_down(ndev, + (enum hnae_loop)st_param[i][0]); } if (data[test_index]) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 9a74c4e2e193..3e0a695537e2 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1914,7 +1914,7 @@ static struct vio_device_id ibmveth_device_table[] = { }; MODULE_DEVICE_TABLE(vio, ibmveth_device_table); -static struct dev_pm_ops ibmveth_pm_ops = { +static const struct dev_pm_ops ibmveth_pm_ops = { .resume = ibmveth_resume }; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 87db1eb5cc44..a3e694679635 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -763,12 +763,6 @@ static int init_resources(struct ibmvnic_adapter *adapter) if (rc) return rc; - rc = init_sub_crq_irqs(adapter); - if (rc) { - netdev_err(netdev, "failed to initialize sub crq irqs\n"); - return -1; - } - rc = init_stats_token(adapter); if (rc) return rc; @@ -1803,7 +1797,6 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter) return rc; } - rc = init_sub_crq_irqs(adapter); return rc; } @@ -3669,6 +3662,13 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) if (rc) { dev_err(dev, "Initialization of sub crqs failed\n"); release_crq_queue(adapter); + return rc; + } + + rc = init_sub_crq_irqs(adapter); + if (rc) { + dev_err(dev, "Failed to initialize sub crq irqs\n"); + release_crq_queue(adapter); } return rc; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index c1af47e45d3f..674773b28b2e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -3280,7 +3280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) mlx4_dbg(dev, - "updating vf %d port %d no link state HW enforcment\n", + "updating vf %d port %d no link state HW enforcement\n", vf, port); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index 1dae8e40fb25..5f41dc92aa68 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -238,7 +238,7 @@ static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state) priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; } - if (mlx4_en_setup_tc(dev, num_tcs)) + if (mlx4_en_alloc_tx_queue_per_tc(dev, num_tcs)) return 1; return 0; @@ -303,7 +303,7 @@ static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets) int has_ets_tc = 0; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->prio_tc[i] >= MLX4_EN_NUM_UP) { + if (ets->prio_tc[i] >= MLX4_EN_NUM_UP_HIGH) { en_err(priv, "Bad priority in UP <=> TC mapping. TC: %d, UP: %d\n", i, ets->prio_tc[i]); return -EINVAL; @@ -472,7 +472,7 @@ static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) goto err; if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc)) goto err; - if (mlx4_en_setup_tc(dev, 0)) + if (mlx4_en_alloc_tx_queue_per_tc(dev, 0)) goto err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index e97fbf327594..c751a1d434ad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1750,7 +1750,8 @@ static void mlx4_en_get_channels(struct net_device *dev, channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; channel->rx_count = priv->rx_ring_num; - channel->tx_count = priv->tx_ring_num[TX] / MLX4_EN_NUM_UP; + channel->tx_count = priv->tx_ring_num[TX] / + priv->prof->num_up; } static int mlx4_en_set_channels(struct net_device *dev, @@ -1763,6 +1764,7 @@ static int mlx4_en_set_channels(struct net_device *dev, int port_up = 0; int xdp_count; int err = 0; + u8 up; if (!channel->tx_count || !channel->rx_count) return -EINVAL; @@ -1773,18 +1775,19 @@ static int mlx4_en_set_channels(struct net_device *dev, mutex_lock(&mdev->state_lock); xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0; - if (channel->tx_count * MLX4_EN_NUM_UP + xdp_count > MAX_TX_RINGS) { + if (channel->tx_count * priv->prof->num_up + xdp_count > + MAX_TX_RINGS) { err = -EINVAL; en_err(priv, "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", - channel->tx_count * MLX4_EN_NUM_UP + xdp_count, + channel->tx_count * priv->prof->num_up + xdp_count, MAX_TX_RINGS); goto out; } memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); new_prof.num_tx_rings_p_up = channel->tx_count; - new_prof.tx_ring_num[TX] = channel->tx_count * MLX4_EN_NUM_UP; + new_prof.tx_ring_num[TX] = channel->tx_count * priv->prof->num_up; new_prof.tx_ring_num[TX_XDP] = xdp_count; new_prof.rx_ring_num = channel->rx_count; @@ -1799,11 +1802,11 @@ static int mlx4_en_set_channels(struct net_device *dev, mlx4_en_safe_replace_resources(priv, tmp); - netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); - if (netdev_get_num_tc(dev)) - mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP); + up = (priv->prof->num_up == MLX4_EN_NUM_UP_LOW) ? + 0 : priv->prof->num_up; + mlx4_en_setup_tc(dev, up); en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]); en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 56cdf38d150e..2b0cbca4beb5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -169,8 +169,10 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].tx_ppp = pfctx; params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; + params->prof[i].num_up = MLX4_EN_NUM_UP_LOW; + params->prof[i].num_tx_rings_p_up = params->num_tx_rings_p_up; params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up * - MLX4_EN_NUM_UP; + params->prof[i].num_up; params->prof[i].rss_rings = 0; params->prof[i].inline_thold = inline_thold; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 9da76e3be2fc..3a291fc1780a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -60,11 +60,11 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) int i; unsigned int offset = 0; - if (up && up != MLX4_EN_NUM_UP) + if (up && up != MLX4_EN_NUM_UP_HIGH) return -EINVAL; netdev_set_num_tc(dev, up); - + netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); /* Partition Tx queues evenly amongst UP's */ for (i = 0; i < up; i++) { netdev_set_tc_queue(dev, i, priv->num_tx_rings_p_up, offset); @@ -86,6 +86,50 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } +int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; + int port_up = 0; + int err = 0; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + mutex_lock(&mdev->state_lock); + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.num_up = (tc == 0) ? MLX4_EN_NUM_UP_LOW : + MLX4_EN_NUM_UP_HIGH; + new_prof.tx_ring_num[TX] = new_prof.num_tx_rings_p_up * + new_prof.num_up; + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true); + if (err) + goto out; + + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + mlx4_en_safe_replace_resources(priv, tmp); + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) { + en_err(priv, "Failed starting port for setup TC\n"); + goto out; + } + } + + err = mlx4_en_setup_tc(dev, tc); +out: + mutex_unlock(&mdev->state_lock); + kfree(tmp); + return err; +} + static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, u32 chain_index, __be16 proto, struct tc_to_netdev *tc) @@ -93,9 +137,12 @@ static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, if (tc->type != TC_SETUP_MQPRIO) return -EINVAL; + if (tc->mqprio->num_tc && tc->mqprio->num_tc != MLX4_EN_NUM_UP_HIGH) + return -EINVAL; + tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; - return mlx4_en_setup_tc(dev, tc->mqprio->num_tc); + return mlx4_en_alloc_tx_queue_per_tc(dev, tc->mqprio->num_tc); } #ifdef CONFIG_RFS_ACCEL @@ -2144,7 +2191,7 @@ static int mlx4_en_copy_priv(struct mlx4_en_priv *dst, memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config, sizeof(dst->hwtstamp_config)); - dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up; + dst->num_tx_rings_p_up = prof->num_tx_rings_p_up; dst->rx_ring_num = prof->rx_ring_num; dst->flags = prof->flags; dst->mdev = src->mdev; @@ -2197,6 +2244,7 @@ static void mlx4_en_update_priv(struct mlx4_en_priv *dst, dst->tx_ring[t] = src->tx_ring[t]; dst->tx_cq[t] = src->tx_cq[t]; } + dst->num_tx_rings_p_up = src->num_tx_rings_p_up; dst->rx_ring_num = src->rx_ring_num; memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile)); } @@ -2780,7 +2828,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) if (priv->tx_ring_num[TX] + xdp_ring_num > MAX_TX_RINGS) { tx_changed = 1; new_prof.tx_ring_num[TX] = - MAX_TX_RINGS - ALIGN(xdp_ring_num, MLX4_EN_NUM_UP); + MAX_TX_RINGS - ALIGN(xdp_ring_num, priv->prof->num_up); en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n"); } @@ -3271,7 +3319,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->flags |= MLX4_EN_DCB_ENABLED; priv->cee_config.pfc_state = false; - for (i = 0; i < MLX4_EN_NUM_UP; i++) + for (i = 0; i < MLX4_EN_NUM_UP_HIGH; i++) priv->cee_config.dcb_pfc[i] = pfc_disabled; if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index a6b0db0e0383..86d2d42d658d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -63,7 +63,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->local_qpn = cpu_to_be32(qpn); context->pri_path.ackto = 1 & 0x07; context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6; - if (user_prio >= 0) { + /* force user priority per tx ring */ + if (user_prio >= 0 && priv->prof->num_up == MLX4_EN_NUM_UP_HIGH) { context->pri_path.sched_queue |= user_prio << 3; context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 7d69d939ee2d..4f3a9b27ce4a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -691,15 +691,11 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx4_en_priv *priv = netdev_priv(dev); u16 rings_p_up = priv->num_tx_rings_p_up; - u8 up = 0; if (netdev_get_num_tc(dev)) return skb_tx_hash(dev, skb); - if (skb_vlan_tag_present(skb)) - up = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT; - - return fallback(dev, skb) % rings_p_up + up * rings_p_up; + return fallback(dev, skb) % rings_p_up; } static void mlx4_bf_copy(void __iomem *dst, const void *src, diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 457e070bca46..a27c9c13a36e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -91,7 +91,7 @@ module_param_array(probe_vf, byte, &probe_vfs_argc, 0444); MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n" "probe_vf=port1,port2,port1+2"); -int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; +static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; module_param_named(log_num_mgm_entry_size, mlx4_log_num_mgm_entry_size, int, 0444); MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 6ea2b7a0c34d..30616cd0140d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -230,7 +230,6 @@ do { \ #define mlx4_warn(mdev, format, ...) \ dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) -extern int mlx4_log_num_mgm_entry_size; extern int log_mtts_per_seg; extern int mlx4_internal_err_reset; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 963b77d51b48..d350b2158104 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -115,11 +115,12 @@ #define MLX4_EN_SMALL_PKT_SIZE 64 #define MLX4_EN_MIN_TX_RING_P_UP 1 #define MLX4_EN_MAX_TX_RING_P_UP 32 -#define MLX4_EN_NUM_UP 8 +#define MLX4_EN_NUM_UP_LOW 1 +#define MLX4_EN_NUM_UP_HIGH 8 #define MLX4_EN_DEF_RX_RING_SIZE 1024 #define MLX4_EN_DEF_TX_RING_SIZE MLX4_EN_DEF_RX_RING_SIZE #define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \ - MLX4_EN_NUM_UP) + MLX4_EN_NUM_UP_HIGH) #define MLX4_EN_DEFAULT_TX_WORK 256 @@ -386,6 +387,7 @@ struct mlx4_en_port_profile { u8 rx_ppp; u8 tx_pause; u8 tx_ppp; + u8 num_up; int rss_rings; int inline_thold; struct hwtstamp_config hwtstamp_config; @@ -485,7 +487,7 @@ enum dcb_pfc_type { struct mlx4_en_cee_config { bool pfc_state; - enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP]; + enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP_HIGH]; }; #endif @@ -761,6 +763,7 @@ extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops; #endif int mlx4_en_setup_tc(struct net_device *dev, u8 up); +int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc); #ifdef CONFIG_RFS_ACCEL void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index cf1ef48bfd8d..5aee05992f27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -11,9 +11,13 @@ config MLX5_CORE Core driver for low level functionality of the ConnectX-4 and Connect-IB cards by Mellanox Technologies. +config MLX5_ACCEL + bool + config MLX5_FPGA bool "Mellanox Technologies Innova support" depends on MLX5_CORE + select MLX5_ACCEL ---help--- Build support for the Innova family of network cards by Mellanox Technologies. Innova network cards are comprised of a ConnectX chip @@ -48,3 +52,15 @@ config MLX5_CORE_IPOIB default n ---help--- MLX5 IPoIB offloads & acceleration support. + +config MLX5_EN_IPSEC + bool "IPSec XFRM cryptography-offload accelaration" + depends on MLX5_ACCEL + depends on MLX5_CORE_EN + depends on XFRM_OFFLOAD + depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD + default n + ---help--- + Build support for IPsec cryptography-offload accelaration in the NIC. + Note: Support for hardware with this capability needs to be selected + for this option to become available. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 5ad093a21a6e..ca367445f864 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -4,9 +4,12 @@ subdir-ccflags-y += -I$(src) mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o + fs_counters.o rl.o lag.o dev.o lib/gid.o -mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o +mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o + +mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ + fpga/ipsec.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ @@ -16,3 +19,6 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o + +mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ + en_accel/ipsec_stats.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c new file mode 100644 index 000000000000..53e69edaedde --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> + +#include "accel/ipsec.h" +#include "mlx5_core.h" +#include "fpga/ipsec.h" + +void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, + struct mlx5_accel_ipsec_sa *cmd) +{ + if (!MLX5_IPSEC_DEV(mdev)) + return ERR_PTR(-EOPNOTSUPP); + + return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd); +} + +int mlx5_accel_ipsec_sa_cmd_wait(void *ctx) +{ + return mlx5_fpga_ipsec_sa_cmd_wait(ctx); +} + +u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_ipsec_device_caps(mdev); +} + +unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_ipsec_counters_count(mdev); +} + +int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int count) +{ + return mlx5_fpga_ipsec_counters_read(mdev, counters, count); +} + +int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) +{ + return mlx5_fpga_ipsec_init(mdev); +} + +void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ + mlx5_fpga_ipsec_cleanup(mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h new file mode 100644 index 000000000000..d6e20fea9554 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_ACCEL_IPSEC_H__ +#define __MLX5_ACCEL_IPSEC_H__ + +#ifdef CONFIG_MLX5_ACCEL + +#include <linux/mlx5/driver.h> + +enum { + MLX5_ACCEL_IPSEC_DEVICE = BIT(1), + MLX5_ACCEL_IPSEC_IPV6 = BIT(2), + MLX5_ACCEL_IPSEC_ESP = BIT(3), + MLX5_ACCEL_IPSEC_LSO = BIT(4), +}; + +#define MLX5_IPSEC_SADB_IP_AH BIT(7) +#define MLX5_IPSEC_SADB_IP_ESP BIT(6) +#define MLX5_IPSEC_SADB_SA_VALID BIT(5) +#define MLX5_IPSEC_SADB_SPI_EN BIT(4) +#define MLX5_IPSEC_SADB_DIR_SX BIT(3) +#define MLX5_IPSEC_SADB_IPV6 BIT(2) + +enum { + MLX5_IPSEC_CMD_ADD_SA = 0, + MLX5_IPSEC_CMD_DEL_SA = 1, +}; + +enum mlx5_accel_ipsec_enc_mode { + MLX5_IPSEC_SADB_MODE_NONE = 0, + MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1, + MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3, +}; + +#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \ + MLX5_ACCEL_IPSEC_DEVICE) + +struct mlx5_accel_ipsec_sa { + __be32 cmd; + u8 key_enc[32]; + u8 key_auth[32]; + __be32 sip[4]; + __be32 dip[4]; + union { + struct { + __be32 reserved; + u8 salt_iv[8]; + __be32 salt; + } __packed gcm; + struct { + u8 salt[16]; + } __packed cbc; + }; + __be32 spi; + __be32 sw_sa_handle; + __be16 tfclen; + u8 enc_mode; + u8 sip_masklen; + u8 dip_masklen; + u8 flags; + u8 reserved[2]; +} __packed; + +/** + * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command + * @mdev: mlx5 device + * @cmd: command to execute + * May be called from atomic context. Returns context pointer, or error + * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic + * context, to cleanup the context pointer + */ +void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, + struct mlx5_accel_ipsec_sa *cmd); + +/** + * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion + * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec + * Sleeps (killable) until command execution is complete. + * Returns the command result, or -EINTR if killed + */ +int mlx5_accel_ipsec_sa_cmd_wait(void *context); + +u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); + +unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev); +int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int count); + +int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); +void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); + +#else + +#define MLX5_IPSEC_DEV(mdev) false + +static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ +} + +#endif + +#endif /* __MLX5_ACCEL_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 4d5bd01f1ebb..f5a2c605749f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -307,6 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_FPGA_DESTROY_QP: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -419,6 +420,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_FLOW_COUNTER: case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_FPGA_CREATE_QP: + case MLX5_CMD_OP_FPGA_MODIFY_QP: + case MLX5_CMD_OP_FPGA_QUERY_QP: + case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -585,6 +590,11 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP); + MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP); + MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP); + MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS); + MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index eef0a50e2388..e1b7ddfecd01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -328,6 +328,7 @@ struct mlx5e_sq_dma { enum { MLX5E_SQ_STATE_ENABLED, + MLX5E_SQ_STATE_IPSEC, }; struct mlx5e_sq_wqe_info { @@ -784,6 +785,9 @@ struct mlx5e_priv { const struct mlx5e_profile *profile; void *ppriv; +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5e_ipsec *ipsec; +#endif }; struct mlx5e_profile { @@ -833,7 +837,6 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq); void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); -struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_rx_am(struct mlx5e_rq *rq); void mlx5e_rx_am_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c new file mode 100644 index 000000000000..bac5103efad3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -0,0 +1,461 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <crypto/internal/geniv.h> +#include <crypto/aead.h> +#include <linux/inetdevice.h> +#include <linux/netdevice.h> +#include <linux/module.h> + +#include "en.h" +#include "accel/ipsec.h" +#include "en_accel/ipsec.h" +#include "en_accel/ipsec_rxtx.h" + +struct mlx5e_ipsec_sa_entry { + struct hlist_node hlist; /* Item in SADB_RX hashtable */ + unsigned int handle; /* Handle in SADB_RX */ + struct xfrm_state *x; + struct mlx5e_ipsec *ipsec; + void *context; +}; + +struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec, + unsigned int handle) +{ + struct mlx5e_ipsec_sa_entry *sa_entry; + struct xfrm_state *ret = NULL; + + rcu_read_lock(); + hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle) + if (sa_entry->handle == handle) { + ret = sa_entry->x; + xfrm_state_hold(ret); + break; + } + rcu_read_unlock(); + + return ret; +} + +static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + unsigned long flags; + int ret; + + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL); + if (ret < 0) + goto out; + + sa_entry->handle = ret; + hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle); + ret = 0; + +out: + spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); + return ret; +} + +static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + unsigned long flags; + + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + hash_del_rcu(&sa_entry->hlist); + spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); +} + +static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + unsigned long flags; + + /* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */ + synchronize_rcu(); + spin_lock_irqsave(&ipsec->sadb_rx_lock, flags); + ida_simple_remove(&ipsec->halloc, sa_entry->handle); + spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags); +} + +static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x) +{ + unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4; + + switch (key_len) { + case 16: + return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128; + case 32: + return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128; + default: + netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n", + key_len, x->aead->alg_name); + return -1; + } +} + +static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_ipsec_sa *hw_sa) +{ + struct xfrm_state *x = sa_entry->x; + struct aead_geniv_ctx *geniv_ctx; + unsigned int crypto_data_len; + struct crypto_aead *aead; + unsigned int key_len; + int ivsize; + + memset(hw_sa, 0, sizeof(*hw_sa)); + + if (op == MLX5_IPSEC_CMD_ADD_SA) { + crypto_data_len = (x->aead->alg_key_len + 7) / 8; + key_len = crypto_data_len - 4; /* 4 bytes salt at end */ + aead = x->data; + geniv_ctx = crypto_aead_ctx(aead); + ivsize = crypto_aead_ivsize(aead); + + memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len); + /* Duplicate 128 bit key twice according to HW layout */ + if (key_len == 16) + memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len); + memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize); + hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len)); + } + + hw_sa->cmd = htonl(op); + hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN; + if (x->props.family == AF_INET) { + hw_sa->sip[3] = x->props.saddr.a4; + hw_sa->dip[3] = x->id.daddr.a4; + hw_sa->sip_masklen = 32; + hw_sa->dip_masklen = 32; + } else { + memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip)); + memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip)); + hw_sa->sip_masklen = 128; + hw_sa->dip_masklen = 128; + hw_sa->flags |= MLX5_IPSEC_SADB_IPV6; + } + hw_sa->spi = x->id.spi; + hw_sa->sw_sa_handle = htonl(sa_entry->handle); + switch (x->id.proto) { + case IPPROTO_ESP: + hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP; + break; + case IPPROTO_AH: + hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH; + break; + default: + break; + } + hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x); + if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) + hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX; +} + +static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) +{ + struct net_device *netdev = x->xso.dev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + + if (x->props.aalgo != SADB_AALG_NONE) { + netdev_info(netdev, "Cannot offload authenticated xfrm states\n"); + return -EINVAL; + } + if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) { + netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n"); + return -EINVAL; + } + if (x->props.calgo != SADB_X_CALG_NONE) { + netdev_info(netdev, "Cannot offload compressed xfrm states\n"); + return -EINVAL; + } + if (x->props.flags & XFRM_STATE_ESN) { + netdev_info(netdev, "Cannot offload ESN xfrm states\n"); + return -EINVAL; + } + if (x->props.family != AF_INET && + x->props.family != AF_INET6) { + netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n"); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_TRANSPORT && + x->props.mode != XFRM_MODE_TUNNEL) { + dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n"); + return -EINVAL; + } + if (x->id.proto != IPPROTO_ESP) { + netdev_info(netdev, "Only ESP xfrm state may be offloaded\n"); + return -EINVAL; + } + if (x->encap) { + netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n"); + return -EINVAL; + } + if (!x->aead) { + netdev_info(netdev, "Cannot offload xfrm states without aead\n"); + return -EINVAL; + } + if (x->aead->alg_icv_len != 128) { + netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n"); + return -EINVAL; + } + if ((x->aead->alg_key_len != 128 + 32) && + (x->aead->alg_key_len != 256 + 32)) { + netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); + return -EINVAL; + } + if (x->tfcpad) { + netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n"); + return -EINVAL; + } + if (!x->geniv) { + netdev_info(netdev, "Cannot offload xfrm states without geniv\n"); + return -EINVAL; + } + if (strcmp(x->geniv, "seqiv")) { + netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n"); + return -EINVAL; + } + if (x->props.family == AF_INET6 && + !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) { + netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n"); + return -EINVAL; + } + return 0; +} + +static int mlx5e_xfrm_add_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = NULL; + struct net_device *netdev = x->xso.dev; + struct mlx5_accel_ipsec_sa hw_sa; + struct mlx5e_priv *priv; + void *context; + int err; + + priv = netdev_priv(netdev); + + err = mlx5e_xfrm_validate_state(x); + if (err) + return err; + + sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + if (!sa_entry) { + err = -ENOMEM; + goto out; + } + + sa_entry->x = x; + sa_entry->ipsec = priv->ipsec; + + /* Add the SA to handle processed incoming packets before the add SA + * completion was received + */ + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) { + err = mlx5e_ipsec_sadb_rx_add(sa_entry); + if (err) { + netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err); + goto err_entry; + } + } + + mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa); + context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa); + if (IS_ERR(context)) { + err = PTR_ERR(context); + goto err_sadb_rx; + } + + err = mlx5_accel_ipsec_sa_cmd_wait(context); + if (err) + goto err_sadb_rx; + + x->xso.offload_handle = (unsigned long)sa_entry; + goto out; + +err_sadb_rx: + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) { + mlx5e_ipsec_sadb_rx_del(sa_entry); + mlx5e_ipsec_sadb_rx_free(sa_entry); + } +err_entry: + kfree(sa_entry); +out: + return err; +} + +static void mlx5e_xfrm_del_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry; + struct mlx5_accel_ipsec_sa hw_sa; + void *context; + + if (!x->xso.offload_handle) + return; + + sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; + WARN_ON(sa_entry->x != x); + + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) + mlx5e_ipsec_sadb_rx_del(sa_entry); + + mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa); + context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa); + if (IS_ERR(context)) + return; + + sa_entry->context = context; +} + +static void mlx5e_xfrm_free_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry; + int res; + + if (!x->xso.offload_handle) + return; + + sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; + WARN_ON(sa_entry->x != x); + + res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context); + sa_entry->context = NULL; + if (res) { + /* Leftover object will leak */ + return; + } + + if (x->xso.flags & XFRM_OFFLOAD_INBOUND) + mlx5e_ipsec_sadb_rx_free(sa_entry); + + kfree(sa_entry); +} + +int mlx5e_ipsec_init(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec = NULL; + + if (!MLX5_IPSEC_DEV(priv->mdev)) { + netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); + return 0; + } + + ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL); + if (!ipsec) + return -ENOMEM; + + hash_init(ipsec->sadb_rx); + spin_lock_init(&ipsec->sadb_rx_lock); + ida_init(&ipsec->halloc); + ipsec->en_priv = priv; + ipsec->en_priv->ipsec = ipsec; + netdev_dbg(priv->netdev, "IPSec attached to netdevice\n"); + return 0; +} + +void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec = priv->ipsec; + + if (!ipsec) + return; + + ida_destroy(&ipsec->halloc); + kfree(ipsec); + priv->ipsec = NULL; +} + +static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + if (x->props.family == AF_INET) { + /* Offload with IPv4 options is not supported yet */ + if (ip_hdr(skb)->ihl > 5) + return false; + } else { + /* Offload with IPv6 extension headers is not support yet */ + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) + return false; + } + + return true; +} + +static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { + .xdo_dev_state_add = mlx5e_xfrm_add_state, + .xdo_dev_state_delete = mlx5e_xfrm_del_state, + .xdo_dev_state_free = mlx5e_xfrm_free_state, + .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, +}; + +void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *netdev = priv->netdev; + + if (!priv->ipsec) + return; + + if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) || + !MLX5_CAP_ETH(mdev, swp)) { + mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n"); + return; + } + + mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); + netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; + netdev->features |= NETIF_F_HW_ESP; + netdev->hw_enc_features |= NETIF_F_HW_ESP; + + if (!MLX5_CAP_ETH(mdev, swp_csum)) { + mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n"); + return; + } + + netdev->features |= NETIF_F_HW_ESP_TX_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; + + if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) || + !MLX5_CAP_ETH(mdev, swp_lso)) { + mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); + return; + } + + mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n"); + netdev->features |= NETIF_F_GSO_ESP; + netdev->hw_features |= NETIF_F_GSO_ESP; + netdev->hw_enc_features |= NETIF_F_GSO_ESP; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h new file mode 100644 index 000000000000..56e00baf16cc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_IPSEC_H__ +#define __MLX5E_IPSEC_H__ + +#ifdef CONFIG_MLX5_EN_IPSEC + +#include <linux/mlx5/device.h> +#include <net/xfrm.h> +#include <linux/idr.h> + +#define MLX5E_IPSEC_SADB_RX_BITS 10 +#define MLX5E_METADATA_ETHER_TYPE (0x8CE4) +#define MLX5E_METADATA_ETHER_LEN 8 + +struct mlx5e_priv; + +struct mlx5e_ipsec_sw_stats { + atomic64_t ipsec_rx_drop_sp_alloc; + atomic64_t ipsec_rx_drop_sadb_miss; + atomic64_t ipsec_rx_drop_syndrome; + atomic64_t ipsec_tx_drop_bundle; + atomic64_t ipsec_tx_drop_no_state; + atomic64_t ipsec_tx_drop_not_ip; + atomic64_t ipsec_tx_drop_trailer; + atomic64_t ipsec_tx_drop_metadata; +}; + +struct mlx5e_ipsec_stats { + u64 ipsec_dec_in_packets; + u64 ipsec_dec_out_packets; + u64 ipsec_dec_bypass_packets; + u64 ipsec_enc_in_packets; + u64 ipsec_enc_out_packets; + u64 ipsec_enc_bypass_packets; + u64 ipsec_dec_drop_packets; + u64 ipsec_dec_auth_fail_packets; + u64 ipsec_enc_drop_packets; + u64 ipsec_add_sa_success; + u64 ipsec_add_sa_fail; + u64 ipsec_del_sa_success; + u64 ipsec_del_sa_fail; + u64 ipsec_cmd_drop; +}; + +struct mlx5e_ipsec { + struct mlx5e_priv *en_priv; + DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS); + spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */ + struct ida halloc; + struct mlx5e_ipsec_sw_stats sw_stats; + struct mlx5e_ipsec_stats stats; +}; + +void mlx5e_ipsec_build_inverse_table(void); +int mlx5e_ipsec_init(struct mlx5e_priv *priv); +void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); +void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); + +int mlx5e_ipsec_get_count(struct mlx5e_priv *priv); +int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data); +void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv); +int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data); + +struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev, + unsigned int handle); + +#else + +static inline void mlx5e_ipsec_build_inverse_table(void) +{ +} + +static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) +{ +} + +static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, + uint8_t *data) +{ + return 0; +} + +static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + return 0; +} + +#endif + +#endif /* __MLX5E_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c new file mode 100644 index 000000000000..4a78aefdf157 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <crypto/aead.h> +#include <net/xfrm.h> +#include <net/esp.h> + +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/ipsec.h" +#include "en.h" + +enum { + MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11, + MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12, +}; + +struct mlx5e_ipsec_rx_metadata { + unsigned char reserved; + __be32 sa_handle; +} __packed; + +enum { + MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8, + MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9, +}; + +struct mlx5e_ipsec_tx_metadata { + __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */ + __be16 seq; /* LSBs of the first TCP seq, only for LSO */ + u8 esp_next_proto; /* Next protocol of ESP */ +} __packed; + +struct mlx5e_ipsec_metadata { + unsigned char syndrome; + union { + unsigned char raw[5]; + /* from FPGA to host, on successful decrypt */ + struct mlx5e_ipsec_rx_metadata rx; + /* from host to FPGA */ + struct mlx5e_ipsec_tx_metadata tx; + } __packed content; + /* packet type ID field */ + __be16 ethertype; +} __packed; + +#define MAX_LSO_MSS 2048 + +/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */ +static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS]; + +static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb) +{ + return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size]; +} + +static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb) +{ + struct mlx5e_ipsec_metadata *mdata; + struct ethhdr *eth; + + if (unlikely(skb_cow_head(skb, sizeof(*mdata)))) + return ERR_PTR(-ENOMEM); + + eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata)); + skb->mac_header -= sizeof(*mdata); + mdata = (struct mlx5e_ipsec_metadata *)(eth + 1); + + memmove(skb->data, skb->data + sizeof(*mdata), + 2 * ETH_ALEN); + + eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE); + + memset(mdata->content.raw, 0, sizeof(mdata->content.raw)); + return mdata; +} + +static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) +{ + unsigned int alen = crypto_aead_authsize(x->data); + struct ipv6hdr *ipv6hdr = ipv6_hdr(skb); + struct iphdr *ipv4hdr = ip_hdr(skb); + unsigned int trailer_len; + u8 plen; + int ret; + + ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1); + if (unlikely(ret)) + return ret; + + trailer_len = alen + plen + 2; + + pskb_trim(skb, skb->len - trailer_len); + if (skb->protocol == htons(ETH_P_IP)) { + ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); + ip_send_check(ipv4hdr); + } else { + ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) - + trailer_len); + } + return 0; +} + +static void mlx5e_ipsec_set_swp(struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, u8 mode, + struct xfrm_offload *xo) +{ + u8 proto; + + /* Tunnel Mode: + * SWP: OutL3 InL3 InL4 + * Pkt: MAC IP ESP IP L4 + * + * Transport Mode: + * SWP: OutL3 InL4 + * InL3 + * Pkt: MAC IP ESP L4 + * + * Offsets are in 2-byte words, counting from start of frame + */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + + if (mode == XFRM_MODE_TUNNEL) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (xo->proto == IPPROTO_IPV6) { + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + proto = inner_ipv6_hdr(skb)->nexthdr; + } else { + proto = inner_ip_hdr(skb)->protocol; + } + } else { + eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + proto = xo->proto; + } + switch (proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + /* Fall through */ + case IPPROTO_TCP: + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + } +} + +static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo) +{ + int iv_offset; + __be64 seqno; + + /* Place the SN in the IV field */ + seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); + iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr); + skb_store_bits(skb, iv_offset, &seqno, 8); +} + +static void mlx5e_ipsec_set_metadata(struct sk_buff *skb, + struct mlx5e_ipsec_metadata *mdata, + struct xfrm_offload *xo) +{ + struct ip_esp_hdr *esph; + struct tcphdr *tcph; + + if (skb_is_gso(skb)) { + /* Add LSO metadata indication */ + esph = ip_esp_hdr(skb); + tcph = inner_tcp_hdr(skb); + netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n", + skb->network_header, + skb->transport_header, + skb->inner_network_header, + skb->inner_transport_header); + netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n", + skb->len, skb_shinfo(skb)->gso_size, + ntohs(tcph->source), ntohs(tcph->dest), + ntohl(tcph->seq), ntohl(esph->seq_no)); + mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP; + mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb); + mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF); + } else { + mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD; + } + mdata->content.tx.esp_next_proto = xo->proto; + + netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n", + mdata->syndrome, mdata->content.tx.esp_next_proto, + ntohs(mdata->content.tx.mss_inv), + ntohs(mdata->content.tx.seq)); +} + +struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct mlx5e_tx_wqe *wqe, + struct sk_buff *skb) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct xfrm_offload *xo = xfrm_offload(skb); + struct mlx5e_ipsec_metadata *mdata; + struct xfrm_state *x; + + if (!xo) + return skb; + + if (unlikely(skb->sp->len != 1)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle); + goto drop; + } + + x = xfrm_input_state(skb); + if (unlikely(!x)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state); + goto drop; + } + + if (unlikely(!x->xso.offload_handle || + (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip); + goto drop; + } + + if (!skb_is_gso(skb)) + if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer); + goto drop; + } + mdata = mlx5e_ipsec_add_metadata(skb); + if (unlikely(IS_ERR(mdata))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); + goto drop; + } + mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo); + mlx5e_ipsec_set_iv(skb, xo); + mlx5e_ipsec_set_metadata(skb, mdata, xo); + + return skb; + +drop: + kfree_skb(skb); + return NULL; +} + +static inline struct xfrm_state * +mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, + struct mlx5e_ipsec_metadata *mdata) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct xfrm_offload *xo; + struct xfrm_state *xs; + u32 sa_handle; + + skb->sp = secpath_dup(skb->sp); + if (unlikely(!skb->sp)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc); + return NULL; + } + + sa_handle = be32_to_cpu(mdata->content.rx.sa_handle); + xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle); + if (unlikely(!xs)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss); + return NULL; + } + + skb->sp->xvec[skb->sp->len++] = xs; + skb->sp->olen++; + + xo = xfrm_offload(skb); + xo->flags = CRYPTO_DONE; + switch (mdata->syndrome) { + case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED: + xo->status = CRYPTO_SUCCESS; + break; + case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED: + xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED; + break; + default: + atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome); + return NULL; + } + return xs; +} + +struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb) +{ + struct mlx5e_ipsec_metadata *mdata; + struct ethhdr *old_eth; + struct ethhdr *new_eth; + struct xfrm_state *xs; + __be16 *ethtype; + + /* Detect inline metadata */ + if (skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN) + return skb; + ethtype = (__be16 *)(skb->data + ETH_ALEN * 2); + if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE)) + return skb; + + /* Use the metadata */ + mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN); + xs = mlx5e_ipsec_build_sp(netdev, skb, mdata); + if (unlikely(!xs)) { + kfree_skb(skb); + return NULL; + } + + /* Remove the metadata from the buffer */ + old_eth = (struct ethhdr *)skb->data; + new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN); + memmove(new_eth, old_eth, 2 * ETH_ALEN); + /* Ethertype is already in its new place */ + skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN); + + return skb; +} + +bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, + netdev_features_t features) +{ + struct xfrm_state *x; + + if (skb->sp && skb->sp->len) { + x = skb->sp->xvec[0]; + if (x && x->xso.offload_handle) + return true; + } + return false; +} + +void mlx5e_ipsec_build_inverse_table(void) +{ + u16 mss_inv; + u32 mss; + + /* Calculate 1/x inverse table for use in GSO data path. + * Using this table, we provide the IPSec accelerator with the value of + * 1/gso_size so that it can infer the position of each segment inside + * the GSO, and increment the ESP sequence number, and generate the IV. + * The HW needs this value in Q0.16 fixed-point number format + */ + mlx5e_ipsec_inverse_table[1] = htons(0xFFFF); + for (mss = 2; mss < MAX_LSO_MSS; mss++) { + mss_inv = ((1ULL << 32) / mss) >> 16; + mlx5e_ipsec_inverse_table[mss] = htons(mss_inv); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h new file mode 100644 index 000000000000..e37ae2598dbb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_IPSEC_RXTX_H__ +#define __MLX5E_IPSEC_RXTX_H__ + +#ifdef CONFIG_MLX5_EN_IPSEC + +#include <linux/skbuff.h> +#include "en.h" + +struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb); +void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +void mlx5e_ipsec_inverse_table_init(void); +bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, + netdev_features_t features); +struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct mlx5e_tx_wqe *wqe, + struct sk_buff *skb); + +#endif /* CONFIG_MLX5_EN_IPSEC */ + +#endif /* __MLX5E_IPSEC_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c new file mode 100644 index 000000000000..6fea59223dc4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/ethtool.h> +#include <net/sock.h> + +#include "en.h" +#include "accel/ipsec.h" +#include "fpga/sdk.h" +#include "en_accel/ipsec.h" + +static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) }, +}; + +static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) }, +}; + +#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ + atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) + +#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc) +#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc) + +#define NUM_IPSEC_COUNTERS (NUM_IPSEC_HW_COUNTERS + NUM_IPSEC_SW_COUNTERS) + +int mlx5e_ipsec_get_count(struct mlx5e_priv *priv) +{ + if (!priv->ipsec) + return 0; + + return NUM_IPSEC_COUNTERS; +} + +int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + unsigned int i, idx = 0; + + if (!priv->ipsec) + return 0; + + for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ipsec_hw_stats_desc[i].format); + + for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ipsec_sw_stats_desc[i].format); + + return NUM_IPSEC_COUNTERS; +} + +void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv) +{ + int ret; + + if (!priv->ipsec) + return; + + ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats, + NUM_IPSEC_HW_COUNTERS); + if (ret) + memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats)); +} + +int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + int i, idx = 0; + + if (!priv->ipsec) + return 0; + + for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats, + mlx5e_ipsec_hw_stats_desc, i); + + for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats, + mlx5e_ipsec_sw_stats_desc, i); + + return NUM_IPSEC_COUNTERS; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 16b1e96a7050..917fade5f5d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -31,6 +31,7 @@ */ #include "en.h" +#include "en_accel/ipsec.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) @@ -186,7 +187,8 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv) + ARRAY_SIZE(mlx5e_pme_status_desc) + - ARRAY_SIZE(mlx5e_pme_error_desc); + ARRAY_SIZE(mlx5e_pme_error_desc) + + mlx5e_ipsec_get_count(priv); case ETH_SS_PRIV_FLAGS: return ARRAY_SIZE(mlx5e_priv_flags); @@ -275,6 +277,9 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format); + /* IPSec counters */ + idx += mlx5e_ipsec_get_strings(priv, data + idx * ETH_GSTRING_LEN); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return; @@ -403,6 +408,9 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, mlx5e_pme_error_desc, i); + /* IPSec counters */ + idx += mlx5e_ipsec_get_stats(priv, data + idx); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9f99f624004f..1eac5003084f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -39,6 +39,9 @@ #include "en.h" #include "en_tc.h" #include "en_rep.h" +#include "en_accel/ipsec.h" +#include "en_accel/ipsec_rxtx.h" +#include "accel/ipsec.h" #include "vxlan.h" struct mlx5e_rq_param { @@ -115,7 +118,7 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && - !params->xdp_prog ? + !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; mlx5e_set_rq_type_params(mdev, params, rq_type); @@ -328,8 +331,10 @@ static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) void mlx5e_update_stats(struct mlx5e_priv *priv, bool full) { - if (full) + if (full) { mlx5e_update_pcie_counters(priv); + mlx5e_ipsec_update_stats(priv); + } mlx5e_update_pport_counters(priv, full); mlx5e_update_vport_counters(priv); mlx5e_update_q_counter(priv); @@ -592,6 +597,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe; +#ifdef CONFIG_MLX5_EN_IPSEC + if (MLX5_IPSEC_DEV(mdev)) { + err = -EINVAL; + netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n"); + goto err_rq_wq_destroy; + } +#endif if (!rq->handle_rx_cqe) { err = -EINVAL; netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err); @@ -624,7 +636,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe; +#ifdef CONFIG_MLX5_EN_IPSEC + if (c->priv->ipsec) + rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe; + else +#endif + rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe; if (!rq->handle_rx_cqe) { kfree(rq->wqe.frag_info); err = -EINVAL; @@ -635,6 +652,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->buff.wqe_sz = params->lro_en ? params->lro_wqe_sz : MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu); +#ifdef CONFIG_MLX5_EN_IPSEC + if (MLX5_IPSEC_DEV(mdev)) + rq->buff.wqe_sz += MLX5E_METADATA_ETHER_LEN; +#endif rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en; byte_count = rq->buff.wqe_sz; @@ -1095,6 +1116,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->max_inline = params->tx_max_inline; sq->min_inline_mode = params->tx_min_inline_mode; + if (MLX5_IPSEC_DEV(c->priv->mdev)) + set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); @@ -1914,6 +1937,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev)); } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -3070,8 +3094,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards); - stats->tx_carrier_errors = - PPORT_802_3_GET(pstats, a_symbol_error_during_carrier); stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + stats->rx_frame_errors; stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; @@ -3508,6 +3530,11 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, features = vlan_features_check(skb, features); features = vxlan_features_check(skb, features); +#ifdef CONFIG_MLX5_EN_IPSEC + if (mlx5e_ipsec_feature_check(skb, netdev, features)) + return features; +#endif + /* Validate if the tunneled packet is being offloaded by HW */ if (skb->encapsulation && (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) @@ -3555,6 +3582,12 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) goto unlock; } + if ((netdev->features & NETIF_F_HW_ESP) && prog) { + netdev_warn(netdev, "can't set XDP with IPSec offload\n"); + err = -EINVAL; + goto unlock; + } + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); @@ -4046,6 +4079,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (MLX5_CAP_GEN(mdev, vport_group_manager)) netdev->switchdev_ops = &mlx5e_switchdev_ops; #endif + + mlx5e_ipsec_build_netdev(priv); } static void mlx5e_create_q_counter(struct mlx5e_priv *priv) @@ -4074,14 +4109,19 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev, void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); + int err; mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv); + err = mlx5e_ipsec_init(priv); + if (err) + mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); mlx5e_build_nic_netdev(netdev); mlx5e_vxlan_init(priv); } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { + mlx5e_ipsec_cleanup(priv); mlx5e_vxlan_cleanup(priv); if (priv->channels.params.xdp_prog) @@ -4473,6 +4513,7 @@ static struct mlx5_interface mlx5e_interface = { void mlx5e_init(void) { + mlx5e_ipsec_build_inverse_table(); mlx5e_build_ptys2ethtool_map(); mlx5_register_interface(&mlx5e_interface); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 5f3c138c948d..325b2c8c1c6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -41,6 +41,7 @@ #include "eswitch.h" #include "en_rep.h" #include "ipoib/ipoib.h" +#include "en_accel/ipsec_rxtx.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -996,7 +997,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); for (; work_done < budget; work_done++) { - struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_cqe(&cq->wq); if (!cqe) break; @@ -1050,7 +1051,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) u16 wqe_counter; bool last_wqe; - cqe = mlx5e_get_cqe(cq); + cqe = mlx5_cqwq_get_cqe(&cq->wq); if (!cqe) break; @@ -1183,3 +1184,43 @@ wq_free_wqe: } #endif /* CONFIG_MLX5_CORE_IPOIB */ + +#ifdef CONFIG_MLX5_EN_IPSEC + +void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_wqe_frag_info *wi; + struct mlx5e_rx_wqe *wqe; + __be16 wqe_counter_be; + struct sk_buff *skb; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + wi = &rq->wqe.frag_info[wqe_counter]; + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); + if (unlikely(!skb)) { + /* a DROP, save the page-reuse checks */ + mlx5e_free_rx_wqe(rq, wi); + goto wq_ll_pop; + } + skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb); + if (unlikely(!skb)) { + mlx5e_free_rx_wqe(rq, wi); + goto wq_ll_pop; + } + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); + + mlx5e_free_rx_wqe_reuse(rq, wi); +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + +#endif /* CONFIG_MLX5_EN_IPSEC */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 0433d69429f3..aaa0f4ebba9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -34,6 +34,7 @@ #include <linux/if_vlan.h> #include "en.h" #include "ipoib/ipoib.h" +#include "en_accel/ipsec_rxtx.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ @@ -299,12 +300,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, } } -static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb) +static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_tx_wqe *wqe, u16 pi) { - struct mlx5_wq_cyc *wq = &sq->wq; - - u16 pi = sq->pc & wq->sz_m1; - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; @@ -319,8 +317,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb) u16 ds_cnt; u16 ihs; - memset(wqe, 0, sizeof(*wqe)); - mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); if (skb_is_gso(skb)) { @@ -375,8 +371,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)]; + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + memset(wqe, 0, sizeof(*wqe)); + +#ifdef CONFIG_MLX5_EN_IPSEC + if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) { + skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb); + if (unlikely(!skb)) + return NETDEV_TX_OK; + } +#endif - return mlx5e_sq_xmit(sq, skb); + return mlx5e_sq_xmit(sq, skb, wqe, pi); } bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) @@ -409,7 +418,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) u16 wqe_counter; bool last_wqe; - cqe = mlx5e_get_cqe(cq); + cqe = mlx5_cqwq_get_cqe(&cq->wq); if (!cqe) break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 5ca6714e3e02..92db28a9ed43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -32,23 +32,6 @@ #include "en.h" -struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) -{ - struct mlx5_cqwq *wq = &cq->wq; - u32 ci = mlx5_cqwq_get_ci(wq); - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); - u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; - u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; - - if (cqe_ownership_bit != sw_ownership_val) - return NULL; - - /* ensure cqe content is read after cqe ownership bit */ - dma_rmb(); - - return cqe; -} - static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, struct mlx5e_icosq *sq, struct mlx5_cqe64 *cqe, @@ -89,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return; - cqe = mlx5e_get_cqe(cq); + cqe = mlx5_cqwq_get_cqe(&cq->wq); if (likely(!cqe)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c index 99cba644b4fc..e37453d838db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c @@ -33,10 +33,44 @@ #include <linux/etherdevice.h> #include <linux/mlx5/cmd.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> #include "mlx5_core.h" #include "fpga/cmd.h" +#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \ + MLX5_FPGA_ACCESS_REG_SIZE_MAX) + +int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, + void *buf, bool write) +{ + u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0}; + u32 out[MLX5_FPGA_ACCESS_REG_SZ]; + int err; + + if (size & 3) + return -EINVAL; + if (addr & 3) + return -EINVAL; + if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX) + return -EINVAL; + + MLX5_SET(fpga_access_reg, in, size, size); + MLX5_SET64(fpga_access_reg, in, address, addr); + if (write) + memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_ACCESS_REG, 0, write); + if (err) + return err; + + if (!write) + memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size); + + return 0; +} + int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps) { u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0}; @@ -46,6 +80,49 @@ int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps) MLX5_REG_FPGA_CAP, 0, 0); } +int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op) +{ + u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_ctrl)]; + + MLX5_SET(fpga_ctrl, in, operation, op); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_CTRL, 0, true); +} + +int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size) +{ + unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len); + u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr); + unsigned int read; + int ret = 0; + + if (cap_size > size) { + mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u", + size, cap_size); + return -EINVAL; + } + + while (cap_size > 0) { + read = min_t(unsigned int, cap_size, + MLX5_FPGA_ACCESS_REG_SIZE_MAX); + + ret = mlx5_fpga_access_reg(dev, read, addr, caps, false); + if (ret) { + mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d", + read, addr, ret); + return ret; + } + + cap_size -= read; + addr += read; + caps += read; + } + + return ret; +} + int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query) { u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0}; @@ -62,3 +139,100 @@ int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query) query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper); return 0; } + +int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc, + u32 *fpga_qpn) +{ + u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)]; + int ret; + + MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP); + memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc, + MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc), + MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc)); + *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn); + return ret; +} + +int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, + enum mlx5_fpga_qpc_field_select fields, + void *fpga_qpc) +{ + u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)]; + + MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP); + MLX5_SET(fpga_modify_qp_in, in, field_select, fields); + MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn); + memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc, + MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc)); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, + u32 fpga_qpn, void *fpga_qpc) +{ + u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)]; + int ret; + + MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP); + MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc), + MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc)); + return ret; +} + +int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn) +{ + u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)]; + + MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP); + MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn, + bool clear, struct mlx5_fpga_qp_counters *data) +{ + u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)]; + int ret; + + MLX5_SET(fpga_query_qp_counters_in, in, opcode, + MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS); + MLX5_SET(fpga_query_qp_counters_in, in, clear, clear); + MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_ack_packets); + data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_send_packets); + data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + tx_ack_packets); + data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + tx_send_packets); + data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_total_drop); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h index a74396a61bc3..94bdfd47c3f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h @@ -53,7 +53,32 @@ struct mlx5_fpga_query { enum mlx5_fpga_status status; }; +enum mlx5_fpga_qpc_field_select { + MLX5_FPGA_QPC_STATE = BIT(0), +}; + +struct mlx5_fpga_qp_counters { + u64 rx_ack_packets; + u64 rx_send_packets; + u64 tx_ack_packets; + u64 tx_send_packets; + u64 rx_total_drop; +}; + int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps); int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query); +int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op); +int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, + void *buf, bool write); +int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size); + +int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc, + u32 *fpga_qpn); +int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, + enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc); +int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc); +int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn, + bool clear, struct mlx5_fpga_qp_counters *data); +int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn); #endif /* __MLX5_FPGA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c new file mode 100644 index 000000000000..c4392f741c5f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -0,0 +1,1042 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <net/addrconf.h> +#include <linux/etherdevice.h> +#include <linux/mlx5/vport.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" +#include "fpga/conn.h" + +#define MLX5_FPGA_PKEY 0xFFFF +#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */ +#define MLX5_FPGA_RECV_SIZE 2048 +#define MLX5_FPGA_PORT_NUM 1 +#define MLX5_FPGA_CQ_BUDGET 64 + +static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct device *dma_device; + int err = 0; + + if (unlikely(!buf->sg[0].data)) + goto out; + + dma_device = &conn->fdev->mdev->pdev->dev; + buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data, + buf->sg[0].size, buf->dma_dir); + err = dma_mapping_error(dma_device, buf->sg[0].dma_addr); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err); + err = -ENOMEM; + goto out; + } + + if (!buf->sg[1].data) + goto out; + + buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data, + buf->sg[1].size, buf->dma_dir); + err = dma_mapping_error(dma_device, buf->sg[1].dma_addr); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err); + dma_unmap_single(dma_device, buf->sg[0].dma_addr, + buf->sg[0].size, buf->dma_dir); + err = -ENOMEM; + } + +out: + return err; +} + +static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct device *dma_device; + + dma_device = &conn->fdev->mdev->pdev->dev; + if (buf->sg[1].data) + dma_unmap_single(dma_device, buf->sg[1].dma_addr, + buf->sg[1].size, buf->dma_dir); + + if (likely(buf->sg[0].data)) + dma_unmap_single(dma_device, buf->sg[0].dma_addr, + buf->sg[0].size, buf->dma_dir); +} + +static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_wqe_data_seg *data; + unsigned int ix; + int err = 0; + + err = mlx5_fpga_conn_map_buf(conn, buf); + if (unlikely(err)) + goto out; + + if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) { + mlx5_fpga_conn_unmap_buf(conn, buf); + return -EBUSY; + } + + ix = conn->qp.rq.pc & (conn->qp.rq.size - 1); + data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix); + data->byte_count = cpu_to_be32(buf->sg[0].size); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key); + data->addr = cpu_to_be64(buf->sg[0].dma_addr); + + conn->qp.rq.pc++; + conn->qp.rq.bufs[ix] = buf; + + /* Make sure that descriptors are written before doorbell record. */ + dma_wmb(); + *conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff); +out: + return err; +} + +static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe) +{ + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc); + /* Make sure that doorbell record is visible before ringing */ + wmb(); + mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL); +} + +static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_data_seg *data; + unsigned int ix, sgi; + int size = 1; + + ix = conn->qp.sq.pc & (conn->qp.sq.size - 1); + + ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix); + data = (void *)(ctrl + 1); + + for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) { + if (!buf->sg[sgi].data) + break; + data->byte_count = cpu_to_be32(buf->sg[sgi].size); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key); + data->addr = cpu_to_be64(buf->sg[sgi].dma_addr); + data++; + size++; + } + + ctrl->imm = 0; + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) | + MLX5_OPCODE_SEND); + ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.mqp.qpn << 8)); + + conn->qp.sq.pc++; + conn->qp.sq.bufs[ix] = buf; + mlx5_fpga_conn_notify_hw(conn, ctrl); +} + +int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + unsigned long flags; + int err; + + if (!conn->qp.active) + return -ENOTCONN; + + err = mlx5_fpga_conn_map_buf(conn, buf); + if (err) + return err; + + spin_lock_irqsave(&conn->qp.sq.lock, flags); + + if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) { + list_add_tail(&buf->list, &conn->qp.sq.backlog); + goto out_unlock; + } + + mlx5_fpga_conn_post_send(conn, buf); + +out_unlock: + spin_unlock_irqrestore(&conn->qp.sq.lock, flags); + return err; +} + +static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_dma_buf *buf; + int err; + + buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0); + if (!buf) + return -ENOMEM; + + buf->sg[0].data = (void *)(buf + 1); + buf->sg[0].size = MLX5_FPGA_RECV_SIZE; + buf->dma_dir = DMA_FROM_DEVICE; + + err = mlx5_fpga_conn_post_recv(conn, buf); + if (err) + kfree(buf); + + return err; +} + +static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_core_mkey *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe, u8 status) +{ + struct mlx5_fpga_dma_buf *buf; + int ix, err; + + ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1); + buf = conn->qp.rq.bufs[ix]; + conn->qp.rq.bufs[ix] = NULL; + if (!status) + buf->sg[0].size = be32_to_cpu(cqe->byte_cnt); + conn->qp.rq.cc++; + + if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR))) + mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + else + mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + + mlx5_fpga_conn_unmap_buf(conn, buf); + + if (unlikely(status || !conn->qp.active)) { + conn->qp.active = false; + kfree(buf); + return; + } + + mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n", + buf->sg[0].size); + conn->recv_cb(conn->cb_arg, buf); + + buf->sg[0].size = MLX5_FPGA_RECV_SIZE; + err = mlx5_fpga_conn_post_recv(conn, buf); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, + "Failed to re-post recv buf: %d\n", err); + kfree(buf); + } +} + +static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe, u8 status) +{ + struct mlx5_fpga_dma_buf *buf, *nextbuf; + unsigned long flags; + int ix; + + spin_lock_irqsave(&conn->qp.sq.lock, flags); + + ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1); + buf = conn->qp.sq.bufs[ix]; + conn->qp.sq.bufs[ix] = NULL; + conn->qp.sq.cc++; + + /* Handle backlog still under the spinlock to ensure message post order */ + if (unlikely(!list_empty(&conn->qp.sq.backlog))) { + if (likely(conn->qp.active)) { + nextbuf = list_first_entry(&conn->qp.sq.backlog, + struct mlx5_fpga_dma_buf, list); + list_del(&nextbuf->list); + mlx5_fpga_conn_post_send(conn, nextbuf); + } + } + + spin_unlock_irqrestore(&conn->qp.sq.lock, flags); + + if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR))) + mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + else + mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + + mlx5_fpga_conn_unmap_buf(conn, buf); + + if (likely(buf->complete)) + buf->complete(conn, conn->fdev, buf, status); + + if (unlikely(status)) + conn->qp.active = false; +} + +static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe) +{ + u8 opcode, status = 0; + + opcode = cqe->op_own >> 4; + + switch (opcode) { + case MLX5_CQE_REQ_ERR: + status = ((struct mlx5_err_cqe *)cqe)->syndrome; + /* Fall through */ + case MLX5_CQE_REQ: + mlx5_fpga_conn_sq_cqe(conn, cqe, status); + break; + + case MLX5_CQE_RESP_ERR: + status = ((struct mlx5_err_cqe *)cqe)->syndrome; + /* Fall through */ + case MLX5_CQE_RESP_SEND: + mlx5_fpga_conn_rq_cqe(conn, cqe, status); + break; + default: + mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n", + opcode); + } +} + +static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn) +{ + mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT, + conn->fdev->conn_res.uar->map, conn->cq.wq.cc); +} + +static void mlx5_fpga_conn_cq_event(struct mlx5_core_cq *mcq, + enum mlx5_event event) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq); + mlx5_fpga_warn(conn->fdev, "CQ event %u on CQ #%u\n", event, mcq->cqn); +} + +static void mlx5_fpga_conn_event(struct mlx5_core_qp *mqp, int event) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mqp, struct mlx5_fpga_conn, qp.mqp); + mlx5_fpga_warn(conn->fdev, "QP event %u on QP #%u\n", event, mqp->qpn); +} + +static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn, + unsigned int budget) +{ + struct mlx5_cqe64 *cqe; + + while (budget) { + cqe = mlx5_cqwq_get_cqe(&conn->cq.wq); + if (!cqe) + break; + + budget--; + mlx5_cqwq_pop(&conn->cq.wq); + mlx5_fpga_conn_handle_cqe(conn, cqe); + mlx5_cqwq_update_db_record(&conn->cq.wq); + } + if (!budget) { + tasklet_schedule(&conn->cq.tasklet); + return; + } + + mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc); + /* ensure cq space is freed before enabling more cqes */ + wmb(); + mlx5_fpga_conn_arm_cq(conn); +} + +static void mlx5_fpga_conn_cq_tasklet(unsigned long data) +{ + struct mlx5_fpga_conn *conn = (void *)data; + + if (unlikely(!conn->qp.active)) + return; + mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); +} + +static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq); + if (unlikely(!conn->qp.active)) + return; + mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); +} + +static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0}; + struct mlx5_wq_param wqp; + struct mlx5_cqe64 *cqe; + int inlen, err, eqn; + unsigned int irqn; + void *cqc, *in; + __be64 *pas; + u32 i; + + cq_size = roundup_pow_of_two(cq_size); + MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size)); + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq, + &conn->cq.wq_ctrl); + if (err) + return err; + + for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) { + cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i); + cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; + } + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * conn->cq.wq_ctrl.frag_buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_cqwq; + } + + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); + if (err) + goto err_cqwq; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index); + MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.frag_buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma); + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.frag_buf, pas); + + err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen); + kvfree(in); + + if (err) + goto err_cqwq; + + conn->cq.mcq.cqe_sz = 64; + conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; + conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; + *conn->cq.mcq.set_ci_db = 0; + *conn->cq.mcq.arm_db = 0; + conn->cq.mcq.vector = 0; + conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; + conn->cq.mcq.event = mlx5_fpga_conn_cq_event; + conn->cq.mcq.irqn = irqn; + conn->cq.mcq.uar = fdev->conn_res.uar; + tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet, + (unsigned long)conn); + + mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); + + goto out; + +err_cqwq: + mlx5_cqwq_destroy(&conn->cq.wq_ctrl); +out: + return err; +} + +static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn) +{ + tasklet_disable(&conn->cq.tasklet); + tasklet_kill(&conn->cq.tasklet); + mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq); + mlx5_cqwq_destroy(&conn->cq.wq_ctrl); +} + +static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + struct mlx5_wq_param wqp; + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq, + &conn->qp.wq_ctrl); +} + +static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn, + unsigned int tx_size, unsigned int rx_size) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {0}; + void *in = NULL, *qpc; + int err, inlen; + + conn->qp.rq.pc = 0; + conn->qp.rq.cc = 0; + conn->qp.rq.size = roundup_pow_of_two(rx_size); + conn->qp.sq.pc = 0; + conn->qp.sq.cc = 0; + conn->qp.sq.size = roundup_pow_of_two(tx_size); + + MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size)); + MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size)); + err = mlx5_fpga_conn_create_wq(conn, temp_qpc); + if (err) + goto out; + + conn->qp.rq.bufs = kvzalloc(sizeof(conn->qp.rq.bufs[0]) * + conn->qp.rq.size, GFP_KERNEL); + if (!conn->qp.rq.bufs) { + err = -ENOMEM; + goto err_wq; + } + + conn->qp.sq.bufs = kvzalloc(sizeof(conn->qp.sq.bufs[0]) * + conn->qp.sq.size, GFP_KERNEL); + if (!conn->qp.sq.bufs) { + err = -ENOMEM; + goto err_rq_bufs; + } + + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + conn->qp.wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_sq_bufs; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index); + MLX5_SET(qpc, qpc, log_page_size, + conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn); + MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size)); + MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + + mlx5_fill_page_array(&conn->qp.wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas)); + + err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen); + if (err) + goto err_sq_bufs; + + conn->qp.mqp.event = mlx5_fpga_conn_event; + mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn); + + goto out; + +err_sq_bufs: + kvfree(conn->qp.sq.bufs); +err_rq_bufs: + kvfree(conn->qp.rq.bufs); +err_wq: + mlx5_wq_destroy(&conn->qp.wq_ctrl); +out: + kvfree(in); + return err; +} + +static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn) +{ + int ix; + + for (ix = 0; ix < conn->qp.rq.size; ix++) { + if (!conn->qp.rq.bufs[ix]) + continue; + mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]); + kfree(conn->qp.rq.bufs[ix]); + conn->qp.rq.bufs[ix] = NULL; + } +} + +static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_dma_buf *buf, *temp; + int ix; + + for (ix = 0; ix < conn->qp.sq.size; ix++) { + buf = conn->qp.sq.bufs[ix]; + if (!buf) + continue; + conn->qp.sq.bufs[ix] = NULL; + mlx5_fpga_conn_unmap_buf(conn, buf); + if (!buf->complete) + continue; + buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR); + } + list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) { + mlx5_fpga_conn_unmap_buf(conn, buf); + if (!buf->complete) + continue; + buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR); + } +} + +static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn) +{ + mlx5_core_destroy_qp(conn->fdev->mdev, &conn->qp.mqp); + mlx5_fpga_conn_free_recv_bufs(conn); + mlx5_fpga_conn_flush_send_bufs(conn); + kvfree(conn->qp.sq.bufs); + kvfree(conn->qp.rq.bufs); + mlx5_wq_destroy(&conn->qp.wq_ctrl); +} + +static inline int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_core_dev *mdev = conn->fdev->mdev; + + mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, NULL, + &conn->qp.mqp); +} + +static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc = NULL; + int err; + + mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn); + + qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL); + if (!qpc) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); + MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM); + MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn); + MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); + + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc, + &conn->qp.mqp); + if (err) { + mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err); + goto out; + } + +out: + kfree(qpc); + return err; +} + +static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc = NULL; + int err; + + mlx5_fpga_dbg(conn->fdev, "QP RTR\n"); + + qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL); + if (!qpc) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES); + MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg)); + MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn); + MLX5_SET(qpc, qpc, next_rcv_psn, + MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn)); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); + MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM); + ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32)); + MLX5_SET(qpc, qpc, primary_address_path.udp_sport, + MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port)); + MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, + conn->qp.sgid_index); + MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip), + MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip)); + + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc, + &conn->qp.mqp); + if (err) { + mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err); + goto out; + } + +out: + kfree(qpc); + return err; +} + +static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc = NULL; + u32 opt_mask; + int err; + + mlx5_fpga_dbg(conn->fdev, "QP RTS\n"); + + qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL); + if (!qpc) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpc, qpc, log_ack_req_freq, 8); + MLX5_SET(qpc, qpc, min_rnr_nak, 0x12); + MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */ + MLX5_SET(qpc, qpc, next_send_psn, + MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn)); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ + + opt_mask = MLX5_QP_OPTPAR_RNR_TIMEOUT; + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, opt_mask, qpc, + &conn->qp.mqp); + if (err) { + mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err); + goto out; + } + +out: + kfree(qpc); + return err; +} + +static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + int err; + + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE); + err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn, + MLX5_FPGA_QPC_STATE, &conn->fpga_qpc); + if (err) { + mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err); + goto out; + } + + err = mlx5_fpga_conn_reset_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state to reset\n"); + goto err_fpga_qp; + } + + err = mlx5_fpga_conn_init_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n"); + goto err_fpga_qp; + } + conn->qp.active = true; + + while (!mlx5_fpga_conn_post_recv_buf(conn)) + ; + + err = mlx5_fpga_conn_rtr_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n"); + goto err_recv_bufs; + } + + err = mlx5_fpga_conn_rts_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n"); + goto err_recv_bufs; + } + goto out; + +err_recv_bufs: + mlx5_fpga_conn_free_recv_bufs(conn); +err_fpga_qp: + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT); + if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn, + MLX5_FPGA_QPC_STATE, &conn->fpga_qpc)) + mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n"); +out: + return err; +} + +struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr, + enum mlx5_ifc_fpga_qp_type qp_type) +{ + struct mlx5_fpga_conn *ret, *conn; + u8 *remote_mac, *remote_ip; + int err; + + if (!attr->recv_cb) + return ERR_PTR(-EINVAL); + + conn = kzalloc(sizeof(*conn), GFP_KERNEL); + if (!conn) + return ERR_PTR(-ENOMEM); + + conn->fdev = fdev; + INIT_LIST_HEAD(&conn->qp.sq.backlog); + + spin_lock_init(&conn->qp.sq.lock); + + conn->recv_cb = attr->recv_cb; + conn->cb_arg = attr->cb_arg; + + remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32); + err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac); + if (err) { + mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err); + ret = ERR_PTR(err); + goto err; + } + + /* Build Modified EUI-64 IPv6 address from the MAC address */ + remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip); + remote_ip[0] = 0xfe; + remote_ip[1] = 0x80; + addrconf_addr_eui48(&remote_ip[8], remote_mac); + + err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index); + if (err) { + mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err); + ret = ERR_PTR(err); + goto err; + } + + err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, + MLX5_ROCE_VERSION_2, + MLX5_ROCE_L3_TYPE_IPV6, + remote_ip, remote_mac, true, 0); + if (err) { + mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err); + ret = ERR_PTR(err); + goto err_rsvd_gid; + } + mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index); + + /* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe, + * created during processing of the cqe + */ + err = mlx5_fpga_conn_create_cq(conn, + (attr->tx_size + attr->rx_size) * 2); + if (err) { + mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err); + ret = ERR_PTR(err); + goto err_gid; + } + + mlx5_fpga_conn_arm_cq(conn); + + err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size); + if (err) { + mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err); + ret = ERR_PTR(err); + goto err_cq; + } + + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT); + MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type); + MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC); + MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q); + MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0); + MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1); + MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0); + MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY); + MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.mqp.qpn); + MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7); + MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7); + + err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc, + &conn->fpga_qpn); + if (err) { + mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err); + ret = ERR_PTR(err); + goto err_qp; + } + + err = mlx5_fpga_conn_connect(conn); + if (err) { + ret = ERR_PTR(err); + goto err_conn; + } + + mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn); + ret = conn; + goto out; + +err_conn: + mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn); +err_qp: + mlx5_fpga_conn_destroy_qp(conn); +err_cq: + mlx5_fpga_conn_destroy_cq(conn); +err_gid: + mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL, + NULL, false, 0); +err_rsvd_gid: + mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index); +err: + kfree(conn); +out: + return ret; +} + +void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + int err = 0; + + conn->qp.active = false; + tasklet_disable(&conn->cq.tasklet); + synchronize_irq(conn->cq.mcq.irqn); + + mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn); + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, NULL, + &conn->qp.mqp); + if (err) + mlx5_fpga_warn(fdev, "qp_modify 2ERR failed: %d\n", err); + mlx5_fpga_conn_destroy_qp(conn); + mlx5_fpga_conn_destroy_cq(conn); + + mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0, + NULL, NULL, false, 0); + mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index); + kfree(conn); +} + +int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev) +{ + int err; + + err = mlx5_nic_vport_enable_roce(fdev->mdev); + if (err) { + mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err); + goto out; + } + + fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev); + if (IS_ERR(fdev->conn_res.uar)) { + err = PTR_ERR(fdev->conn_res.uar); + mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err); + goto err_roce; + } + mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n", + fdev->conn_res.uar->index); + + err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn); + if (err) { + mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err); + goto err_uar; + } + mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn); + + err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn, + &fdev->conn_res.mkey); + if (err) { + mlx5_fpga_err(fdev, "create mkey failed, %d\n", err); + goto err_dealloc_pd; + } + mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key); + + return 0; + +err_dealloc_pd: + mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); +err_uar: + mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); +err_roce: + mlx5_nic_vport_disable_roce(fdev->mdev); +out: + return err; +} + +void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev) +{ + mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey); + mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); + mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); + mlx5_nic_vport_disable_roce(fdev->mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h new file mode 100644 index 000000000000..44bd9eccc711 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_FPGA_CONN_H__ +#define __MLX5_FPGA_CONN_H__ + +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> + +#include "fpga/core.h" +#include "fpga/sdk.h" +#include "wq.h" + +struct mlx5_fpga_conn { + struct mlx5_fpga_device *fdev; + + void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + void *cb_arg; + + /* FPGA QP */ + u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)]; + u32 fpga_qpn; + + /* CQ */ + struct { + struct mlx5_cqwq wq; + struct mlx5_frag_wq_ctrl wq_ctrl; + struct mlx5_core_cq mcq; + struct tasklet_struct tasklet; + } cq; + + /* QP */ + struct { + bool active; + int sgid_index; + struct mlx5_wq_qp wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_qp mqp; + struct { + spinlock_t lock; /* Protects all SQ state */ + unsigned int pc; + unsigned int cc; + unsigned int size; + struct mlx5_fpga_dma_buf **bufs; + struct list_head backlog; + } sq; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + struct mlx5_fpga_dma_buf **bufs; + } rq; + } qp; +}; + +int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev); +void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev); +struct mlx5_fpga_conn * +mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr, + enum mlx5_ifc_fpga_qp_type qp_type); +void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn); +int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf); + +#endif /* __MLX5_FPGA_CONN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index d88b332e9669..31e5a2627eb8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -35,7 +35,9 @@ #include <linux/mlx5/driver.h> #include "mlx5_core.h" +#include "lib/mlx5.h" #include "fpga/core.h" +#include "fpga/conn.h" static const char *const mlx5_fpga_error_strings[] = { "Null Syndrome", @@ -100,10 +102,34 @@ static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev) return 0; } +int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) +{ + int err; + struct mlx5_core_dev *mdev = fdev->mdev; + + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); + if (err) { + mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err); + return err; + } + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX); + if (err) { + mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err); + return err; + } + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF); + if (err) { + mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err); + return err; + } + return 0; +} + int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) { struct mlx5_fpga_device *fdev = mdev->fpga; unsigned long flags; + unsigned int max_num_qps; int err; if (!fdev) @@ -123,6 +149,28 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) mlx5_fpga_image_name(fdev->last_oper_image), MLX5_CAP_FPGA(fdev->mdev, image_version)); + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); + err = mlx5_core_reserve_gids(mdev, max_num_qps); + if (err) + goto out; + + err = mlx5_fpga_conn_device_init(fdev); + if (err) + goto err_rsvd_gid; + + if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { + err = mlx5_fpga_device_brb(fdev); + if (err) + goto err_conn_init; + } + + goto out; + +err_conn_init: + mlx5_fpga_conn_device_cleanup(fdev); + +err_rsvd_gid: + mlx5_core_unreserve_gids(mdev, max_num_qps); out: spin_lock_irqsave(&fdev->state_lock, flags); fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS; @@ -130,7 +178,7 @@ out: return err; } -int mlx5_fpga_device_init(struct mlx5_core_dev *mdev) +int mlx5_fpga_init(struct mlx5_core_dev *mdev) { struct mlx5_fpga_device *fdev = NULL; @@ -151,9 +199,42 @@ int mlx5_fpga_device_init(struct mlx5_core_dev *mdev) return 0; } -void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev) +void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int max_num_qps; + unsigned long flags; + int err; + + if (!fdev) + return; + + spin_lock_irqsave(&fdev->state_lock, flags); + if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) { + spin_unlock_irqrestore(&fdev->state_lock, flags); + return; + } + fdev->state = MLX5_FPGA_STATUS_NONE; + spin_unlock_irqrestore(&fdev->state_lock, flags); + + if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); + if (err) + mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n", + err); + } + + mlx5_fpga_conn_device_cleanup(fdev); + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); + mlx5_core_unreserve_gids(mdev, max_num_qps); +} + +void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) { - kfree(mdev->fpga); + struct mlx5_fpga_device *fdev = mdev->fpga; + + mlx5_fpga_device_stop(mdev); + kfree(fdev); mdev->fpga = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index c55044d66778..82405ed84725 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -44,6 +44,15 @@ struct mlx5_fpga_device { enum mlx5_fpga_status state; enum mlx5_fpga_image last_admin_image; enum mlx5_fpga_image last_oper_image; + + /* QP Connection resources */ + struct { + u32 pdn; + struct mlx5_core_mkey mkey; + struct mlx5_uars_page *uar; + } conn_res; + + struct mlx5_fpga_ipsec *ipsec; }; #define mlx5_fpga_dbg(__adev, format, ...) \ @@ -68,19 +77,20 @@ struct mlx5_fpga_device { #define mlx5_fpga_info(__adev, format, ...) \ dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__) -int mlx5_fpga_device_init(struct mlx5_core_dev *mdev); -void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev); +int mlx5_fpga_init(struct mlx5_core_dev *mdev); +void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev); int mlx5_fpga_device_start(struct mlx5_core_dev *mdev); +void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev); void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data); #else -static inline int mlx5_fpga_device_init(struct mlx5_core_dev *mdev) +static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev) { return 0; } -static inline void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev) +static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) { } @@ -89,6 +99,10 @@ static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) return 0; } +static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) +{ +} + static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c new file mode 100644 index 000000000000..42970e2a05ff --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "fpga/ipsec.h" +#include "fpga/sdk.h" +#include "fpga/core.h" + +#define SBU_QP_QUEUE_SIZE 8 + +enum mlx5_ipsec_response_syndrome { + MLX5_IPSEC_RESPONSE_SUCCESS = 0, + MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1, + MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2, + MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3, +}; + +enum mlx5_fpga_ipsec_sacmd_status { + MLX5_FPGA_IPSEC_SACMD_PENDING, + MLX5_FPGA_IPSEC_SACMD_SEND_FAIL, + MLX5_FPGA_IPSEC_SACMD_COMPLETE, +}; + +struct mlx5_ipsec_command_context { + struct mlx5_fpga_dma_buf buf; + struct mlx5_accel_ipsec_sa sa; + enum mlx5_fpga_ipsec_sacmd_status status; + int status_code; + struct completion complete; + struct mlx5_fpga_device *dev; + struct list_head list; /* Item in pending_cmds */ +}; + +struct mlx5_ipsec_sadb_resp { + __be32 syndrome; + __be32 sw_sa_handle; + u8 reserved[24]; +} __packed; + +struct mlx5_fpga_ipsec { + struct list_head pending_cmds; + spinlock_t pending_cmds_lock; /* Protects pending_cmds */ + u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)]; + struct mlx5_fpga_conn *conn; +}; + +static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) +{ + if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) + return false; + + if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != + MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) + return false; + + if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != + MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC) + return false; + + return true; +} + +static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, + u8 status) +{ + struct mlx5_ipsec_command_context *context; + + if (status) { + context = container_of(buf, struct mlx5_ipsec_command_context, + buf); + mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n", + status); + context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL; + complete(&context->complete); + } +} + +static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome) +{ + switch (syndrome) { + case MLX5_IPSEC_RESPONSE_SUCCESS: + return 0; + case MLX5_IPSEC_RESPONSE_SADB_ISSUE: + return -EEXIST; + case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST: + return -EINVAL; + case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE: + return -EIO; + } + return -EIO; +} + +static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data; + struct mlx5_ipsec_command_context *context; + enum mlx5_ipsec_response_syndrome syndrome; + struct mlx5_fpga_device *fdev = cb_arg; + unsigned long flags; + + if (buf->sg[0].size < sizeof(*resp)) { + mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n", + buf->sg[0].size, sizeof(*resp)); + return; + } + + mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n", + ntohl(resp->syndrome), ntohl(resp->sw_sa_handle)); + + spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); + context = list_first_entry_or_null(&fdev->ipsec->pending_cmds, + struct mlx5_ipsec_command_context, + list); + if (context) + list_del(&context->list); + spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); + + if (!context) { + mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n"); + return; + } + mlx5_fpga_dbg(fdev, "Handling response for %p\n", context); + + if (context->sa.sw_sa_handle != resp->sw_sa_handle) { + mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n", + ntohl(context->sa.sw_sa_handle), + ntohl(resp->sw_sa_handle)); + return; + } + + syndrome = ntohl(resp->syndrome); + context->status_code = syndrome_to_errno(syndrome); + context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE; + + if (context->status_code) + mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n", + syndrome); + complete(&context->complete); +} + +void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, + struct mlx5_accel_ipsec_sa *cmd) +{ + struct mlx5_ipsec_command_context *context; + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned long flags; + int res = 0; + + BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0); + if (!fdev || !fdev->ipsec) + return ERR_PTR(-EOPNOTSUPP); + + context = kzalloc(sizeof(*context), GFP_ATOMIC); + if (!context) + return ERR_PTR(-ENOMEM); + + memcpy(&context->sa, cmd, sizeof(*cmd)); + context->buf.complete = mlx5_fpga_ipsec_send_complete; + context->buf.sg[0].size = sizeof(context->sa); + context->buf.sg[0].data = &context->sa; + init_completion(&context->complete); + context->dev = fdev; + spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); + list_add_tail(&context->list, &fdev->ipsec->pending_cmds); + spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); + + context->status = MLX5_FPGA_IPSEC_SACMD_PENDING; + + res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); + if (res) { + mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n", + res); + spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); + list_del(&context->list); + spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); + kfree(context); + return ERR_PTR(res); + } + /* Context will be freed by wait func after completion */ + return context; +} + +int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx) +{ + struct mlx5_ipsec_command_context *context = ctx; + int res; + + res = wait_for_completion_killable(&context->complete); + if (res) { + mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n"); + return -EINTR; + } + + if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE) + res = context->status_code; + else + res = -EIO; + + kfree(context); + return res; +} + +u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + u32 ret = 0; + + if (mlx5_fpga_is_ipsec_device(mdev)) + ret |= MLX5_ACCEL_IPSEC_DEVICE; + else + return ret; + + if (!fdev->ipsec) + return ret; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp)) + ret |= MLX5_ACCEL_IPSEC_ESP; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6)) + ret |= MLX5_ACCEL_IPSEC_IPV6; + + if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso)) + ret |= MLX5_ACCEL_IPSEC_LSO; + + return ret; +} + +unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + if (!fdev || !fdev->ipsec) + return 0; + + return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, + number_of_ipsec_counters); +} + +int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int counters_count) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int i; + u32 *data; + u32 count; + u64 addr; + int ret; + + if (!fdev || !fdev->ipsec) + return 0; + + addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, + ipsec_counters_addr_low) + + ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, + ipsec_counters_addr_high) << 32); + + count = mlx5_fpga_ipsec_counters_count(mdev); + + data = kzalloc(sizeof(u32) * count * 2, GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto out; + } + + ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data, + MLX5_FPGA_ACCESS_TYPE_DONTCARE); + if (ret < 0) { + mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n", + ret); + goto out; + } + ret = 0; + + if (count > counters_count) + count = counters_count; + + /* Each counter is low word, then high. But each word is big-endian */ + for (i = 0; i < count; i++) + counters[i] = (u64)ntohl(data[i * 2]) | + ((u64)ntohl(data[i * 2 + 1]) << 32); + +out: + kfree(data); + return ret; +} + +int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_conn_attr init_attr = {0}; + struct mlx5_fpga_device *fdev = mdev->fpga; + struct mlx5_fpga_conn *conn; + int err; + + if (!mlx5_fpga_is_ipsec_device(mdev)) + return 0; + + fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL); + if (!fdev->ipsec) + return -ENOMEM; + + err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps), + fdev->ipsec->caps); + if (err) { + mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n", + err); + goto error; + } + + INIT_LIST_HEAD(&fdev->ipsec->pending_cmds); + spin_lock_init(&fdev->ipsec->pending_cmds_lock); + + init_attr.rx_size = SBU_QP_QUEUE_SIZE; + init_attr.tx_size = SBU_QP_QUEUE_SIZE; + init_attr.recv_cb = mlx5_fpga_ipsec_recv; + init_attr.cb_arg = fdev; + conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); + if (IS_ERR(conn)) { + err = PTR_ERR(conn); + mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n", + err); + goto error; + } + fdev->ipsec->conn = conn; + return 0; + +error: + kfree(fdev->ipsec); + fdev->ipsec = NULL; + return err; +} + +void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + if (!mlx5_fpga_is_ipsec_device(mdev)) + return; + + mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn); + kfree(fdev->ipsec); + fdev->ipsec = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h new file mode 100644 index 000000000000..26a3e4b56972 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_FPGA_IPSEC_H__ +#define __MLX5_FPGA_IPSEC_H__ + +#include "accel/ipsec.h" + +#ifdef CONFIG_MLX5_FPGA + +void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, + struct mlx5_accel_ipsec_sa *cmd); +int mlx5_fpga_ipsec_sa_cmd_wait(void *context); + +u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); +unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev); +int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, + unsigned int counters_count); + +int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev); +void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev); + +#else + +static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev, + struct mlx5_accel_ipsec_sa *cmd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context) +{ + return -EOPNOTSUPP; +} + +static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline unsigned int +mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, + u64 *counters) +{ + return 0; +} + +static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) +{ +} + +#endif /* CONFIG_MLX5_FPGA */ + +#endif /* __MLX5_FPGA_SADB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c new file mode 100644 index 000000000000..3c11d6e2160a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/mlx5/device.h> + +#include "fpga/core.h" +#include "fpga/conn.h" +#include "fpga/sdk.h" + +struct mlx5_fpga_conn * +mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr) +{ + return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create); + +void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn) +{ + mlx5_fpga_conn_destroy(conn); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy); + +int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + return mlx5_fpga_conn_send(conn, buf); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg); + +static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size, + u64 addr, u8 *buf) +{ + size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX; + size_t bytes_done = 0; + u8 actual_size; + int err; + + if (!fdev->mdev) + return -ENOTCONN; + + while (bytes_done < size) { + actual_size = min(max_size, (size - bytes_done)); + + err = mlx5_fpga_access_reg(fdev->mdev, actual_size, + addr + bytes_done, + buf + bytes_done, false); + if (err) { + mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n", + err); + break; + } + + bytes_done += actual_size; + } + + return err; +} + +static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size, + u64 addr, u8 *buf) +{ + size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX; + size_t bytes_done = 0; + u8 actual_size; + int err; + + if (!fdev->mdev) + return -ENOTCONN; + + while (bytes_done < size) { + actual_size = min(max_size, (size - bytes_done)); + + err = mlx5_fpga_access_reg(fdev->mdev, actual_size, + addr + bytes_done, + buf + bytes_done, true); + if (err) { + mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n"); + break; + } + + bytes_done += actual_size; + } + + return err; +} + +int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type) +{ + int ret; + + switch (access_type) { + case MLX5_FPGA_ACCESS_TYPE_I2C: + ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf); + if (ret) + return ret; + break; + default: + mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n", + access_type); + return -EACCES; + } + + return size; +} +EXPORT_SYMBOL(mlx5_fpga_mem_read); + +int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type) +{ + int ret; + + switch (access_type) { + case MLX5_FPGA_ACCESS_TYPE_I2C: + ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf); + if (ret) + return ret; + break; + default: + mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n", + access_type); + return -EACCES; + } + + return size; +} +EXPORT_SYMBOL(mlx5_fpga_mem_write); + +int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf) +{ + return mlx5_fpga_sbu_caps(fdev->mdev, buf, size); +} +EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h new file mode 100644 index 000000000000..baa537e54a49 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef MLX5_FPGA_SDK_H +#define MLX5_FPGA_SDK_H + +#include <linux/types.h> +#include <linux/dma-direction.h> + +/** + * DOC: Innova SDK + * This header defines the in-kernel API for Innova FPGA client drivers. + */ + +enum mlx5_fpga_access_type { + MLX5_FPGA_ACCESS_TYPE_I2C = 0x0, + MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0, +}; + +struct mlx5_fpga_conn; +struct mlx5_fpga_device; + +/** + * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry + */ +struct mlx5_fpga_dma_entry { + /** @data: Virtual address pointer to the data */ + void *data; + /** @size: Size in bytes of the data */ + unsigned int size; + /** @dma_addr: Private member. Physical DMA-mapped address of the data */ + dma_addr_t dma_addr; +}; + +/** + * struct mlx5_fpga_dma_buf - A packet buffer + * May contain up to 2 scatter-gather data entries + */ +struct mlx5_fpga_dma_buf { + /** @dma_dir: DMA direction */ + enum dma_data_direction dma_dir; + /** @sg: Scatter-gather entries pointing to the data in memory */ + struct mlx5_fpga_dma_entry sg[2]; + /** @list: Item in SQ backlog, for TX packets */ + struct list_head list; + /** + * @complete: Completion routine, for TX packets + * @conn: FPGA Connection this packet was sent to + * @fdev: FPGA device this packet was sent to + * @buf: The packet buffer + * @status: 0 if successful, or an error code otherwise + */ + void (*complete)(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, u8 status); +}; + +/** + * struct mlx5_fpga_conn_attr - FPGA connection attributes + * Describes the attributes of a connection + */ +struct mlx5_fpga_conn_attr { + /** @tx_size: Size of connection TX queue, in packets */ + unsigned int tx_size; + /** @rx_size: Size of connection RX queue, in packets */ + unsigned int rx_size; + /** + * @recv_cb: Callback function which is called for received packets + * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg + * @buf: A buffer containing a received packet + * + * buf is guaranteed to only contain a single scatter-gather entry. + * The size of the actual packet received is specified in buf.sg[0].size + * When this callback returns, the packet buffer may be re-used for + * subsequent receives. + */ + void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + void *cb_arg; +}; + +/** + * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection + * @fdev: The FPGA device + * @attr: Attributes of the new connection + * + * Sets up a new FPGA SBU connection with the specified attributes. + * The receive callback function may be called for incoming messages even + * before this function returns. + * + * The caller must eventually destroy the connection by calling + * mlx5_fpga_sbu_conn_destroy. + * + * Return: A new connection, or ERR_PTR() error value otherwise. + */ +struct mlx5_fpga_conn * +mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr); + +/** + * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection + * @conn: The FPGA SBU connection to destroy + * + * Cleans up an FPGA SBU connection which was previously created with + * mlx5_fpga_sbu_conn_create. + */ +void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn); + +/** + * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet + * @fdev: An FPGA SBU connection + * @buf: The packet buffer + * + * Queues a packet for transmission over an FPGA SBU connection. + * The buffer should not be modified or freed until completion. + * Upon completion, the buf's complete() callback is invoked, indicating the + * success or error status of the transmission. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf); + +/** + * mlx5_fpga_mem_read() - Read from FPGA memory address space + * @fdev: The FPGA device + * @size: Size of chunk to read, in bytes + * @addr: Starting address to read from, in FPGA address space + * @buf: Buffer to read into + * @access_type: Method for reading + * + * Reads from the specified address into the specified buffer. + * The address may point to configuration space or to DDR. + * Large reads may be performed internally as several non-atomic operations. + * This function may sleep, so should not be called from atomic contexts. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type); + +/** + * mlx5_fpga_mem_write() - Write to FPGA memory address space + * @fdev: The FPGA device + * @size: Size of chunk to write, in bytes + * @addr: Starting address to write to, in FPGA address space + * @buf: Buffer which contains data to write + * @access_type: Method for writing + * + * Writes the specified buffer data to FPGA memory at the specified address. + * The address may point to configuration space or to DDR. + * Large writes may be performed internally as several non-atomic operations. + * This function may sleep, so should not be called from atomic contexts. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type); + +/** + * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities + * @fdev: The FPGA device + * @size: Size of the buffer to read into + * @buf: Buffer to read the capabilities into + * + * Reads the FPGA SBU capabilities into the specified buffer. + * The format of the capabilities buffer is SBU-dependent. + * + * Return: 0 if successful + * -EINVAL if the buffer is not large enough to contain SBU caps + * or any other error value otherwise. + */ +int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf); + +#endif /* MLX5_FPGA_SDK_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 0648a659b21d..4b6b03d6297f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -67,6 +67,7 @@ enum { enum { MLX5_DROP_NEW_HEALTH_WORK, + MLX5_DROP_NEW_RECOVERY_WORK, }; static u8 get_nic_state(struct mlx5_core_dev *dev) @@ -194,7 +195,7 @@ static void health_care(struct work_struct *work) mlx5_handle_bad_state(dev); spin_lock_irqsave(&health->wq_lock, flags); - if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) + if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) schedule_delayed_work(&health->recover_work, recover_delay); else dev_err(&dev->pdev->dev, @@ -322,6 +323,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) init_timer(&health->timer); health->sick = 0; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; @@ -345,11 +347,22 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); cancel_delayed_work_sync(&health->recover_work); cancel_work_sync(&health->work); } +void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + spin_lock(&health->wq_lock); + set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + spin_unlock(&health->wq_lock); + cancel_delayed_work_sync(&dev->priv.health.recover_work); +} + void mlx5_health_cleanup(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c new file mode 100644 index 000000000000..de2aed44ab85 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/etherdevice.h> +#include <linux/idr.h> +#include "mlx5_core.h" + +void mlx5_init_reserved_gids(struct mlx5_core_dev *dev) +{ + unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size); + + ida_init(&dev->roce.reserved_gids.ida); + dev->roce.reserved_gids.start = tblsz; + dev->roce.reserved_gids.count = 0; +} + +void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev) +{ + WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida)); + dev->roce.reserved_gids.start = 0; + dev->roce.reserved_gids.count = 0; + ida_destroy(&dev->roce.reserved_gids.ida); +} + +int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count) +{ + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n"); + return -EPERM; + } + if (dev->roce.reserved_gids.start < count) { + mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n", + count); + return -ENOMEM; + } + if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) { + mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count); + return -ENOMEM; + } + + dev->roce.reserved_gids.start -= count; + dev->roce.reserved_gids.count += count; + mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n", + dev->roce.reserved_gids.count, + dev->roce.reserved_gids.start); + return 0; +} + +void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count) +{ + WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up"); + WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved", + count, dev->roce.reserved_gids.count); + + dev->roce.reserved_gids.start += count; + dev->roce.reserved_gids.count -= count; + mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n", + dev->roce.reserved_gids.count, + dev->roce.reserved_gids.start); +} + +int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index) +{ + int end = dev->roce.reserved_gids.start + + dev->roce.reserved_gids.count; + int index = 0; + + index = ida_simple_get(&dev->roce.reserved_gids.ida, + dev->roce.reserved_gids.start, end, + GFP_KERNEL); + if (index < 0) + return index; + + mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index); + *gid_index = index; + return 0; +} + +void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index) +{ + mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index); + ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index); +} + +unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev) +{ + return dev->roce.reserved_gids.count; +} +EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count); + +int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, + u8 roce_version, u8 roce_l3_type, const u8 *gid, + const u8 *mac, bool vlan, u16 vlan_id) +{ +#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0}; + void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); + char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr, + source_l3_address); + void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr, + source_mac_47_32); + int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address); + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EINVAL; + + if (gid) { + if (vlan) { + MLX5_SET_RA(in_addr, vlan_valid, 1); + MLX5_SET_RA(in_addr, vlan_id, vlan_id); + } + + ether_addr_copy(addr_mac, mac); + MLX5_SET_RA(in_addr, roce_version, roce_version); + MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type); + memcpy(addr_l3_addr, gid, gidsz); + } + + MLX5_SET(set_roce_address_in, in, roce_address_index, index); + MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_roce_gid_set); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h new file mode 100644 index 000000000000..7550b1cc8c6a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_MLX5_H__ +#define __LIB_MLX5_H__ + +void mlx5_init_reserved_gids(struct mlx5_core_dev *dev); +void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev); +int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); +void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); +int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); +void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c7f75e12c13b..c065132b956d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -56,7 +56,9 @@ #ifdef CONFIG_MLX5_CORE_EN #include "eswitch.h" #endif +#include "lib/mlx5.h" #include "fpga/core.h" +#include "accel/ipsec.h" MODULE_AUTHOR("Eli Cohen <[email protected]>"); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -936,6 +938,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_mkey_table(dev); + mlx5_init_reserved_gids(dev); + err = mlx5_init_rl_table(dev); if (err) { dev_err(&pdev->dev, "Failed to init rate limiting\n"); @@ -956,8 +960,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_eswitch_cleanup; } + err = mlx5_fpga_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init fpga device %d\n", err); + goto err_sriov_cleanup; + } + return 0; +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); err_eswitch_cleanup: #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); @@ -981,11 +993,13 @@ out: static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { + mlx5_fpga_cleanup(dev); mlx5_sriov_cleanup(dev); #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); #endif mlx5_cleanup_rl_table(dev); + mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); @@ -1020,7 +1034,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (err) { dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n", FW_PRE_INIT_TIMEOUT_MILI); - goto out; + goto out_err; } err = mlx5_cmd_init(dev); @@ -1117,16 +1131,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_disable_msix; } - err = mlx5_fpga_device_init(dev); - if (err) { - dev_err(&pdev->dev, "fpga device init failed %d\n", err); - goto err_put_uars; - } - err = mlx5_start_eqs(dev); if (err) { dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); - goto err_fpga_init; + goto err_put_uars; } err = alloc_comp_eqs(dev); @@ -1160,7 +1168,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, err = mlx5_fpga_device_start(dev); if (err) { dev_err(&pdev->dev, "fpga device start failed %d\n", err); - goto err_reg_dev; + goto err_fpga_start; + } + err = mlx5_accel_ipsec_init(dev); + if (err) { + dev_err(&pdev->dev, "IPSec device start failed %d\n", err); + goto err_ipsec_start; } if (mlx5_device_registered(dev)) { @@ -1181,6 +1194,11 @@ out: return 0; err_reg_dev: + mlx5_accel_ipsec_cleanup(dev); +err_ipsec_start: + mlx5_fpga_device_stop(dev); + +err_fpga_start: mlx5_sriov_detach(dev); err_sriov: @@ -1198,9 +1216,6 @@ err_affinity_hints: err_stop_eqs: mlx5_stop_eqs(dev); -err_fpga_init: - mlx5_fpga_device_cleanup(dev); - err_put_uars: mlx5_put_uars_page(dev, priv->uar); @@ -1243,7 +1258,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, int err = 0; if (cleanup) - mlx5_drain_health_wq(dev); + mlx5_drain_health_recovery(dev); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { @@ -1254,9 +1269,15 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto out; } + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); + if (mlx5_device_registered(dev)) mlx5_detach_device(dev); + mlx5_accel_ipsec_cleanup(dev); + mlx5_fpga_device_stop(dev); + mlx5_sriov_detach(dev); #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_detach(dev->priv.eswitch); @@ -1265,7 +1286,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); - mlx5_fpga_device_cleanup(dev); mlx5_put_uars_page(dev, priv->uar); mlx5_disable_msix(dev); if (cleanup) @@ -1282,8 +1302,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_cmd_cleanup(dev); out: - clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); mutex_unlock(&dev->intf_state_mutex); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 06019d00ab7b..5abfec1c3399 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -926,12 +926,16 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev) { + if (atomic_inc_return(&mdev->roce.roce_en) != 1) + return 0; return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); } EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) { + if (atomic_dec_return(&mdev->roce.roce_en) != 0) + return 0; return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); } EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 921673c42bc9..6bcfc25350f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -54,6 +54,12 @@ static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq) return mlx5_wq_cyc_get_size(wq) << wq->log_stride; } +static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq) +{ + return mlx5_wq_cyc_get_byte_size(&wq->rq) + + mlx5_wq_cyc_get_byte_size(&wq->sq); +} + static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq) { return mlx5_cqwq_get_size(wq) << wq->log_stride; @@ -99,6 +105,46 @@ err_db_free: return err; } +int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + int err; + + wq->rq.log_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4; + wq->rq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_rq_size)) - 1; + + wq->sq.log_stride = ilog2(MLX5_SEND_WQE_BB); + wq->sq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_sq_size)) - 1; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + err = mlx5_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + wq->rq.buf = wq_ctrl->buf.direct.buf; + wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq); + wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR]; + wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR]; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_frag_wq_ctrl *wq_ctrl) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index d8afed898c31..718589d0cec2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -34,6 +34,8 @@ #define __MLX5_WQ_H__ #include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> struct mlx5_wq_param { int linear; @@ -60,6 +62,11 @@ struct mlx5_wq_cyc { u8 log_stride; }; +struct mlx5_wq_qp { + struct mlx5_wq_cyc rq; + struct mlx5_wq_cyc sq; +}; + struct mlx5_cqwq { struct mlx5_frag_buf frag_buf; __be32 *db; @@ -87,6 +94,10 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, struct mlx5_wq_ctrl *wq_ctrl); u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq); +int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl); + int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_frag_wq_ctrl *wq_ctrl); @@ -146,6 +157,22 @@ static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq) *wq->db = cpu_to_be32(wq->cc & 0xffffff); } +static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq) +{ + u32 ci = mlx5_cqwq_get_ci(wq); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); + u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; + u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + + if (cqe_ownership_bit != sw_ownership_val) + return NULL; + + /* ensure cqe content is read after cqe ownership bit */ + dma_rmb(); + + return cqe; +} + static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq) { return wq->cur_sz == wq->sz_m1; diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h index 9ca85383aa35..7a712b6b09ec 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -96,7 +96,7 @@ struct mlxfw_dev { u16 psid_size; }; -#if IS_ENABLED(CONFIG_MLXFW) +#if IS_REACHABLE(CONFIG_MLXFW) int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, const struct firmware *firmware); #else diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 700cc8c6aa5b..192cb93e7669 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3301,6 +3301,9 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); u16 vid = vlan_dev_vlan_id(vlan_dev); + if (netif_is_bridge_port(vlan_dev)) + return 0; + if (mlxsw_sp_port_dev_check(real_dev)) return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev, event, vid); diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index 0d5a7b9203a4..0e331e2f685a 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -25,6 +25,16 @@ config NFP cards working as a advanced Ethernet NIC. It works with both SR-IOV physical and virtual functions. +config NFP_APP_FLOWER + bool "NFP4000/NFP6000 TC Flower offload support" + depends on NFP + depends on NET_SWITCHDEV + ---help--- + Enable driver support for TC Flower offload on NFP4000 and NFP6000. + Say Y, if you are planning to make use of TC Flower offload + either directly, with Open vSwitch, or any other way. Note that + TC Flower offload requires specific FW to work. + config NFP_DEBUG bool "Debug support for Netronome(R) NFP4000/NFP6000 NIC drivers" depends on NFP diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 10b556b2c59d..b8e1358868bd 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -27,9 +27,17 @@ nfp-objs := \ nfp_port.o \ bpf/main.o \ bpf/offload.o \ + nic/main.o + +ifeq ($(CONFIG_NFP_APP_FLOWER),y) +nfp-objs += \ + flower/action.o \ flower/cmsg.o \ flower/main.o \ - nic/main.o + flower/match.o \ + flower/metadata.o \ + flower/offload.o +endif ifeq ($(CONFIG_BPF_SYSCALL),y) nfp-objs += \ diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c new file mode 100644 index 000000000000..db9750695dc7 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/bitfield.h> +#include <net/pkt_cls.h> +#include <net/switchdev.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_vlan.h> + +#include "cmsg.h" +#include "main.h" +#include "../nfp_net_repr.h" + +static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) +{ + size_t act_size = sizeof(struct nfp_fl_pop_vlan); + u16 tmp_pop_vlan_op; + + tmp_pop_vlan_op = + FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | + FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_POP_VLAN); + + pop_vlan->a_op = cpu_to_be16(tmp_pop_vlan_op); + pop_vlan->reserved = 0; +} + +static void +nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, + const struct tc_action *action) +{ + size_t act_size = sizeof(struct nfp_fl_push_vlan); + struct tcf_vlan *vlan = to_vlan(action); + u16 tmp_push_vlan_tci; + u16 tmp_push_vlan_op; + + tmp_push_vlan_op = + FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | + FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PUSH_VLAN); + + push_vlan->a_op = cpu_to_be16(tmp_push_vlan_op); + /* Set action push vlan parameters. */ + push_vlan->reserved = 0; + push_vlan->vlan_tpid = tcf_vlan_push_proto(action); + + tmp_push_vlan_tci = + FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, vlan->tcfv_push_prio) | + FIELD_PREP(NFP_FL_PUSH_VLAN_VID, vlan->tcfv_push_vid) | + NFP_FL_PUSH_VLAN_CFI; + push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); +} + +static int +nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, + struct nfp_fl_payload *nfp_flow, bool last, + struct net_device *in_dev) +{ + size_t act_size = sizeof(struct nfp_fl_output); + struct net_device *out_dev; + u16 tmp_output_op; + int ifindex; + + /* Set action opcode to output action. */ + tmp_output_op = + FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | + FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_OUTPUT); + + output->a_op = cpu_to_be16(tmp_output_op); + + /* Set action output parameters. */ + output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0); + + ifindex = tcf_mirred_ifindex(action); + out_dev = __dev_get_by_index(dev_net(in_dev), ifindex); + if (!out_dev) + return -EOPNOTSUPP; + + /* Only offload egress ports are on the same device as the ingress + * port. + */ + if (!switchdev_port_same_parent_id(in_dev, out_dev)) + return -EOPNOTSUPP; + + output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); + if (!output->port) + return -EOPNOTSUPP; + + nfp_flow->meta.shortcut = output->port; + + return 0; +} + +static int +nfp_flower_loop_action(const struct tc_action *a, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev) +{ + struct nfp_fl_push_vlan *psh_v; + struct nfp_fl_pop_vlan *pop_v; + struct nfp_fl_output *output; + int err; + + if (is_tcf_gact_shot(a)) { + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP); + } else if (is_tcf_mirred_egress_redirect(a)) { + if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; + err = nfp_fl_output(output, a, nfp_fl, true, netdev); + if (err) + return err; + + *a_len += sizeof(struct nfp_fl_output); + } else if (is_tcf_mirred_egress_mirror(a)) { + if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; + err = nfp_fl_output(output, a, nfp_fl, false, netdev); + if (err) + return err; + + *a_len += sizeof(struct nfp_fl_output); + } else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { + if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + pop_v = (struct nfp_fl_pop_vlan *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_POPV); + + nfp_fl_pop_vlan(pop_v); + *a_len += sizeof(struct nfp_fl_pop_vlan); + } else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { + if (*a_len + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + psh_v = (struct nfp_fl_push_vlan *)&nfp_fl->action_data[*a_len]; + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + nfp_fl_push_vlan(psh_v, a); + *a_len += sizeof(struct nfp_fl_push_vlan); + } else { + /* Currently we do not handle any other actions. */ + return -EOPNOTSUPP; + } + + return 0; +} + +int nfp_flower_compile_action(struct tc_cls_flower_offload *flow, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow) +{ + int act_len, act_cnt, err; + const struct tc_action *a; + LIST_HEAD(actions); + + memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); + nfp_flow->meta.act_len = 0; + act_len = 0; + act_cnt = 0; + + tcf_exts_to_list(flow->exts, &actions); + list_for_each_entry(a, &actions, list) { + err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev); + if (err) + return err; + act_cnt++; + } + + /* We optimise when the action list is small, this can unfortunately + * not happen once we have more than one action in the action list. + */ + if (act_cnt > 1) + nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + nfp_flow->meta.act_len = act_len; + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 7761be436726..dd7fa9cf225f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -36,6 +36,7 @@ #include <linux/skbuff.h> #include <net/dst_metadata.h> +#include "main.h" #include "../nfpcore/nfp_cpp.h" #include "../nfp_net_repr.h" #include "./cmsg.h" @@ -52,12 +53,7 @@ nfp_flower_cmsg_get_hdr(struct sk_buff *skb) return (struct nfp_flower_cmsg_hdr *)skb->data; } -static void *nfp_flower_cmsg_get_data(struct sk_buff *skb) -{ - return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN; -} - -static struct sk_buff * +struct sk_buff * nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, enum nfp_flower_cmsg_type_port type) { @@ -79,9 +75,8 @@ nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, return skb; } -int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok) +int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok) { - struct nfp_repr *repr = netdev_priv(netdev); struct nfp_flower_cmsg_portmod *msg; struct sk_buff *skb; @@ -94,7 +89,7 @@ int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok) msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id); msg->reserved = 0; msg->info = carrier_ok; - msg->mtu = cpu_to_be16(netdev->mtu); + msg->mtu = cpu_to_be16(repr->netdev->mtu); nfp_ctrl_tx(repr->app->ctrl, skb); @@ -149,6 +144,9 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) case NFP_FLOWER_CMSG_TYPE_PORT_MOD: nfp_flower_cmsg_portmod_rx(app, skb); break; + case NFP_FLOWER_CMSG_TYPE_FLOW_STATS: + nfp_flower_rx_flow_stats(app, skb); + break; default: nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n", type); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 2eeddada7f4d..cf738de170ab 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -40,6 +40,196 @@ #include "../nfp_app.h" +#define NFP_FLOWER_LAYER_META BIT(0) +#define NFP_FLOWER_LAYER_PORT BIT(1) +#define NFP_FLOWER_LAYER_MAC BIT(2) +#define NFP_FLOWER_LAYER_TP BIT(3) +#define NFP_FLOWER_LAYER_IPV4 BIT(4) +#define NFP_FLOWER_LAYER_IPV6 BIT(5) +#define NFP_FLOWER_LAYER_CT BIT(6) +#define NFP_FLOWER_LAYER_VXLAN BIT(7) + +#define NFP_FLOWER_LAYER_ETHER BIT(3) +#define NFP_FLOWER_LAYER_ARP BIT(4) + +#define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) +#define NFP_FLOWER_MASK_VLAN_CFI BIT(12) +#define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0) + +#define NFP_FL_SC_ACT_DROP 0x80000000 +#define NFP_FL_SC_ACT_USER 0x7D000000 +#define NFP_FL_SC_ACT_POPV 0x6A000000 +#define NFP_FL_SC_ACT_NULL 0x00000000 + +/* The maximum action list size (in bytes) supported by the NFP. + */ +#define NFP_FL_MAX_A_SIZ 1216 +#define NFP_FL_LW_SIZ 2 + +/* Action opcodes */ +#define NFP_FL_ACTION_OPCODE_OUTPUT 0 +#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 +#define NFP_FL_ACTION_OPCODE_POP_VLAN 2 +#define NFP_FL_ACTION_OPCODE_NUM 32 + +#define NFP_FL_ACT_JMP_ID GENMASK(15, 8) +#define NFP_FL_ACT_LEN_LW GENMASK(7, 0) + +#define NFP_FL_OUT_FLAGS_LAST BIT(15) +#define NFP_FL_OUT_FLAGS_USE_TUN BIT(4) +#define NFP_FL_OUT_FLAGS_TYPE_IDX GENMASK(2, 0) + +#define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13) +#define NFP_FL_PUSH_VLAN_CFI BIT(12) +#define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) + +struct nfp_fl_output { + __be16 a_op; + __be16 flags; + __be32 port; +}; + +struct nfp_fl_push_vlan { + __be16 a_op; + __be16 reserved; + __be16 vlan_tpid; + __be16 vlan_tci; +}; + +struct nfp_fl_pop_vlan { + __be16 a_op; + __be16 reserved; +}; + +/* Metadata without L2 (1W/4B) + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | key_layers | mask_id | reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_meta_one { + u8 nfp_flow_key_layer; + u8 mask_id; + u16 reserved; +}; + +/* Metadata with L2 (1W/4B) + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | key_type | mask_id | PCP |p| vlan outermost VID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * ^ ^ + * NOTE: | TCI | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_meta_two { + u8 nfp_flow_key_layer; + u8 mask_id; + __be16 tci; +}; + +/* Port details (1W/4B) + * ---------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | port_ingress | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_in_port { + __be32 in_port; +}; + +/* L2 details (4W/16B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mac_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mac_addr_dst, 47 - 32 | mac_addr_src, 15 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mac_addr_src, 47 - 16 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mpls outermost label | TC |B| reserved |q| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_mac_mpls { + u8 mac_dst[6]; + u8 mac_src[6]; + __be32 mpls_lse; +}; + +/* L4 ports (for UDP, TCP, SCTP) (1W/4B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | port_src | port_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_tp_ports { + __be16 port_src; + __be16 port_dst; +}; + +/* L3 IPv4 details (3W/12B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | DSCP |ECN| protocol | reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv4 { + u8 tos; + u8 proto; + u8 ttl; + u8 reserved; + __be32 ipv4_src; + __be32 ipv4_dst; +}; + +/* L3 IPv6 details (10W/40B) + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | DSCP |ECN| protocol | reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_exthdr | res | ipv6_flow_label | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6 { + u8 tos; + u8 proto; + u8 ttl; + u8 reserved; + __be32 ipv6_flow_label_exthdr; + struct in6_addr ipv6_src; + struct in6_addr ipv6_dst; +}; + /* The base header for a control message packet. * Defines an 8-bit version, and an 8-bit type, padded * to a 32-bit word. Rest of the packet is type-specific. @@ -55,7 +245,10 @@ struct nfp_flower_cmsg_hdr { /* Types defined for port related control messages */ enum nfp_flower_cmsg_type_port { + NFP_FLOWER_CMSG_TYPE_FLOW_ADD = 0, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2, NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, + NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16, NFP_FLOWER_CMSG_TYPE_MAX = 32, }; @@ -110,7 +303,15 @@ nfp_flower_cmsg_pcie_port(u8 nfp_pcie, enum nfp_flower_cmsg_port_vnic_type type, NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT); } -int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok); +static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb) +{ + return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN; +} + +int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok); void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb); +struct sk_buff * +nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, + enum nfp_flower_cmsg_type_port type); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index 8e5ca6b4bb33..5fe6d3582597 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -34,10 +34,13 @@ #include <linux/etherdevice.h> #include <linux/pci.h> #include <linux/skbuff.h> +#include <linux/vmalloc.h> #include <net/devlink.h> #include <net/dst_metadata.h> +#include "main.h" #include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nffw.h" #include "../nfpcore/nfp_nsp.h" #include "../nfp_app.h" #include "../nfp_main.h" @@ -46,13 +49,7 @@ #include "../nfp_port.h" #include "./cmsg.h" -/** - * struct nfp_flower_priv - Flower APP per-vNIC priv data - * @nn: Pointer to vNIC - */ -struct nfp_flower_priv { - struct nfp_net *nn; -}; +#define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn) { @@ -104,51 +101,30 @@ nfp_flower_repr_get(struct nfp_app *app, u32 port_id) return reprs->reprs[port]; } -static void -nfp_flower_repr_netdev_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) -{ - struct nfp_repr *repr = netdev_priv(netdev); - enum nfp_repr_type type; - u32 port_id; - u8 port = 0; - - port_id = repr->dst->u.port_info.port_id; - type = nfp_flower_repr_get_type_and_port(repr->app, port_id, &port); - nfp_repr_get_stats64(repr->app, type, port, stats); -} - -static int nfp_flower_repr_netdev_open(struct net_device *netdev) +static int +nfp_flower_repr_netdev_open(struct nfp_app *app, struct nfp_repr *repr) { int err; - err = nfp_flower_cmsg_portmod(netdev, true); + err = nfp_flower_cmsg_portmod(repr, true); if (err) return err; - netif_carrier_on(netdev); - netif_tx_wake_all_queues(netdev); + netif_carrier_on(repr->netdev); + netif_tx_wake_all_queues(repr->netdev); return 0; } -static int nfp_flower_repr_netdev_stop(struct net_device *netdev) +static int +nfp_flower_repr_netdev_stop(struct nfp_app *app, struct nfp_repr *repr) { - netif_carrier_off(netdev); - netif_tx_disable(netdev); + netif_carrier_off(repr->netdev); + netif_tx_disable(repr->netdev); - return nfp_flower_cmsg_portmod(netdev, false); + return nfp_flower_cmsg_portmod(repr, false); } -static const struct net_device_ops nfp_flower_repr_netdev_ops = { - .ndo_open = nfp_flower_repr_netdev_open, - .ndo_stop = nfp_flower_repr_netdev_stop, - .ndo_start_xmit = nfp_repr_xmit, - .ndo_get_stats64 = nfp_flower_repr_netdev_get_stats64, - .ndo_has_offload_stats = nfp_repr_has_offload_stats, - .ndo_get_offload_stats = nfp_repr_get_offload_stats, -}; - static void nfp_flower_sriov_disable(struct nfp_app *app) { nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF); @@ -162,14 +138,19 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp); struct nfp_flower_priv *priv = app->priv; struct nfp_reprs *reprs, *old_reprs; + enum nfp_port_type port_type; const u8 queue = 0; int i, err; + port_type = repr_type == NFP_REPR_TYPE_PF ? NFP_PORT_PF_PORT : + NFP_PORT_VF_PORT; + reprs = nfp_reprs_alloc(cnt); if (!reprs) return -ENOMEM; for (i = 0; i < cnt; i++) { + struct nfp_port *port; u32 port_id; reprs->reprs[i] = nfp_repr_alloc(app); @@ -178,15 +159,24 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, goto err_reprs_clean; } + port = nfp_port_alloc(app, port_type, reprs->reprs[i]); + if (repr_type == NFP_REPR_TYPE_PF) { + port->pf_id = i; + } else { + port->pf_id = 0; /* For now we only support 1 PF */ + port->vf_id = i; + } + eth_hw_addr_random(reprs->reprs[i]); port_id = nfp_flower_cmsg_pcie_port(nfp_pcie, vnic_type, i, queue); err = nfp_repr_init(app, reprs->reprs[i], - &nfp_flower_repr_netdev_ops, - port_id, NULL, priv->nn->dp.netdev); - if (err) + port_id, port, priv->nn->dp.netdev); + if (err) { + nfp_port_free(port); goto err_reprs_clean; + } nfp_info(app->cpp, "%s%d Representor(%s) created\n", repr_type == NFP_REPR_TYPE_PF ? "PF" : "VF", i, @@ -260,7 +250,6 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port); err = nfp_repr_init(app, reprs->reprs[phys_port], - &nfp_flower_repr_netdev_ops, cmsg_port_id, port, priv->nn->dp.netdev); if (err) { nfp_port_free(port); @@ -296,26 +285,16 @@ static int nfp_flower_start(struct nfp_app *app) NFP_REPR_TYPE_PF, 1); } -static void nfp_flower_vnic_clean(struct nfp_app *app, struct nfp_net *nn) -{ - kfree(app->priv); - app->priv = NULL; -} - static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn, unsigned int id) { - struct nfp_flower_priv *priv; + struct nfp_flower_priv *priv = app->priv; if (id > 0) { nfp_warn(app->cpp, "FlowerNIC doesn't support more than one data vNIC\n"); goto err_invalid_port; } - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - app->priv = priv; priv->nn = nn; eth_hw_addr_random(nn->dp.netdev); @@ -331,6 +310,8 @@ err_invalid_port: static int nfp_flower_init(struct nfp_app *app) { const struct nfp_pf *pf = app->pf; + u64 version; + int err; if (!pf->eth_tbl) { nfp_warn(app->cpp, "FlowerNIC requires eth table\n"); @@ -347,7 +328,37 @@ static int nfp_flower_init(struct nfp_app *app) return -EINVAL; } + version = nfp_rtsym_read_le(app->pf->rtbl, "hw_flower_version", &err); + if (err) { + nfp_warn(app->cpp, "FlowerNIC requires hw_flower_version memory symbol\n"); + return err; + } + + /* We need to ensure hardware has enough flower capabilities. */ + if (version != NFP_FLOWER_ALLOWED_VER) { + nfp_warn(app->cpp, "FlowerNIC: unsupported firmware version\n"); + return -EINVAL; + } + + app->priv = vzalloc(sizeof(struct nfp_flower_priv)); + if (!app->priv) + return -ENOMEM; + + err = nfp_flower_metadata_init(app); + if (err) + goto err_free_app_priv; + return 0; + +err_free_app_priv: + vfree(app->priv); + return err; +} + +static void nfp_flower_clean(struct nfp_app *app) +{ + vfree(app->priv); + app->priv = NULL; } const struct nfp_app_type app_flower = { @@ -358,9 +369,12 @@ const struct nfp_app_type app_flower = { .extra_cap = nfp_flower_extra_cap, .init = nfp_flower_init, + .clean = nfp_flower_clean, .vnic_init = nfp_flower_vnic_init, - .vnic_clean = nfp_flower_vnic_clean, + + .repr_open = nfp_flower_repr_netdev_open, + .repr_stop = nfp_flower_repr_netdev_stop, .start = nfp_flower_start, .stop = nfp_flower_stop, @@ -372,4 +386,6 @@ const struct nfp_app_type app_flower = { .eswitch_mode_get = eswitch_mode_get, .repr_get = nfp_flower_repr_get, + + .setup_tc = nfp_flower_setup_tc, }; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h new file mode 100644 index 000000000000..9e64c048e83f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_FLOWER_H__ +#define __NFP_FLOWER_H__ 1 + +#include <linux/circ_buf.h> +#include <linux/hashtable.h> +#include <linux/time64.h> +#include <linux/types.h> + +struct tc_to_netdev; +struct net_device; +struct nfp_app; + +#define NFP_FL_STATS_ENTRY_RS BIT(20) +#define NFP_FL_STATS_ELEM_RS 4 +#define NFP_FL_REPEATED_HASH_MAX BIT(17) +#define NFP_FLOWER_HASH_BITS 19 +#define NFP_FLOWER_MASK_ENTRY_RS 256 +#define NFP_FLOWER_MASK_ELEMENT_RS 1 +#define NFP_FLOWER_MASK_HASH_BITS 10 + +#define NFP_FL_META_FLAG_NEW_MASK 128 +#define NFP_FL_META_FLAG_LAST_MASK 1 + +#define NFP_FL_MASK_REUSE_TIME_NS 40000 +#define NFP_FL_MASK_ID_LOCATION 1 + +struct nfp_fl_mask_id { + struct circ_buf mask_id_free_list; + struct timespec64 *last_used; + u8 init_unallocated; +}; + +struct nfp_fl_stats_id { + struct circ_buf free_list; + u32 init_unalloc; + u8 repeated_em_count; +}; + +/** + * struct nfp_flower_priv - Flower APP per-vNIC priv data + * @nn: Pointer to vNIC + * @mask_id_seed: Seed used for mask hash table + * @flower_version: HW version of flower + * @stats_ids: List of free stats ids + * @mask_ids: List of free mask ids + * @mask_table: Hash table used to store masks + * @flow_table: Hash table used to store flower rules + */ +struct nfp_flower_priv { + struct nfp_net *nn; + u32 mask_id_seed; + u64 flower_version; + struct nfp_fl_stats_id stats_ids; + struct nfp_fl_mask_id mask_ids; + DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS); + DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS); +}; + +struct nfp_fl_key_ls { + u32 key_layer_two; + u8 key_layer; + int key_size; +}; + +struct nfp_fl_rule_metadata { + u8 key_len; + u8 mask_len; + u8 act_len; + u8 flags; + __be32 host_ctx_id; + __be64 host_cookie __packed; + __be64 flow_version __packed; + __be32 shortcut; +}; + +struct nfp_fl_stats { + u64 pkts; + u64 bytes; + u64 used; +}; + +struct nfp_fl_payload { + struct nfp_fl_rule_metadata meta; + unsigned long tc_flower_cookie; + struct hlist_node link; + struct rcu_head rcu; + spinlock_t lock; /* lock stats */ + struct nfp_fl_stats stats; + char *unmasked_data; + char *mask_data; + char *action_data; +}; + +struct nfp_fl_stats_frame { + __be32 stats_con_id; + __be32 pkt_count; + __be64 byte_count; + __be64 stats_cookie; +}; + +int nfp_flower_metadata_init(struct nfp_app *app); +void nfp_flower_metadata_cleanup(struct nfp_app *app); + +int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, + u32 handle, __be16 proto, struct tc_to_netdev *tc); +int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, + struct nfp_fl_key_ls *key_ls, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow); +int nfp_flower_compile_action(struct tc_cls_flower_offload *flow, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow); +int nfp_compile_flow_metadata(struct nfp_app *app, + struct tc_cls_flower_offload *flow, + struct nfp_fl_payload *nfp_flow); +int nfp_modify_flow_metadata(struct nfp_app *app, + struct nfp_fl_payload *nfp_flow); + +struct nfp_fl_payload * +nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); +struct nfp_fl_payload * +nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); + +void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c new file mode 100644 index 000000000000..0e08404480ef --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -0,0 +1,292 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/bitfield.h> +#include <net/pkt_cls.h> + +#include "cmsg.h" +#include "main.h" + +static void +nfp_flower_compile_meta_tci(struct nfp_flower_meta_two *frame, + struct tc_cls_flower_offload *flow, u8 key_type, + bool mask_version) +{ + struct flow_dissector_key_vlan *flow_vlan; + u16 tmp_tci; + + /* Populate the metadata frame. */ + frame->nfp_flow_key_layer = key_type; + frame->mask_id = ~0; + + if (mask_version) { + frame->tci = cpu_to_be16(~0); + return; + } + + flow_vlan = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_VLAN, + flow->key); + + /* Populate the tci field. */ + if (!flow_vlan->vlan_id) { + tmp_tci = 0; + } else { + tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO, + flow_vlan->vlan_priority) | + FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID, + flow_vlan->vlan_id) | + NFP_FLOWER_MASK_VLAN_CFI; + } + frame->tci = cpu_to_be16(tmp_tci); +} + +static void +nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type) +{ + frame->nfp_flow_key_layer = key_type; + frame->mask_id = 0; + frame->reserved = 0; +} + +static int +nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, + bool mask_version) +{ + if (mask_version) { + frame->in_port = cpu_to_be32(~0); + return 0; + } + + frame->in_port = cpu_to_be32(cmsg_port); + + return 0; +} + +static void +nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame, + struct tc_cls_flower_offload *flow, + bool mask_version) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_eth_addrs *flow_mac; + + flow_mac = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + target); + + memset(frame, 0, sizeof(struct nfp_flower_mac_mpls)); + + /* Populate mac frame. */ + ether_addr_copy(frame->mac_dst, &flow_mac->dst[0]); + ether_addr_copy(frame->mac_src, &flow_mac->src[0]); + + if (mask_version) + frame->mpls_lse = cpu_to_be32(~0); +} + +static void +nfp_flower_compile_tport(struct nfp_flower_tp_ports *frame, + struct tc_cls_flower_offload *flow, + bool mask_version) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_ports *flow_tp; + + flow_tp = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_PORTS, + target); + + frame->port_src = flow_tp->src; + frame->port_dst = flow_tp->dst; +} + +static void +nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame, + struct tc_cls_flower_offload *flow, + bool mask_version) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_ipv4_addrs *flow_ipv4; + struct flow_dissector_key_basic *flow_basic; + + flow_ipv4 = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + target); + + flow_basic = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_BASIC, + target); + + /* Populate IPv4 frame. */ + frame->reserved = 0; + frame->ipv4_src = flow_ipv4->src; + frame->ipv4_dst = flow_ipv4->dst; + frame->proto = flow_basic->ip_proto; + /* Wildcard TOS/TTL for now. */ + frame->tos = 0; + frame->ttl = 0; +} + +static void +nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, + struct tc_cls_flower_offload *flow, + bool mask_version) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_ipv6_addrs *flow_ipv6; + struct flow_dissector_key_basic *flow_basic; + + flow_ipv6 = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + target); + + flow_basic = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_BASIC, + target); + + /* Populate IPv6 frame. */ + frame->reserved = 0; + frame->ipv6_src = flow_ipv6->src; + frame->ipv6_dst = flow_ipv6->dst; + frame->proto = flow_basic->ip_proto; + /* Wildcard LABEL/TOS/TTL for now. */ + frame->ipv6_flow_label_exthdr = 0; + frame->tos = 0; + frame->ttl = 0; +} + +int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, + struct nfp_fl_key_ls *key_ls, + struct net_device *netdev, + struct nfp_fl_payload *nfp_flow) +{ + int err; + u8 *ext; + u8 *msk; + + memset(nfp_flow->unmasked_data, 0, key_ls->key_size); + memset(nfp_flow->mask_data, 0, key_ls->key_size); + + ext = nfp_flow->unmasked_data; + msk = nfp_flow->mask_data; + if (NFP_FLOWER_LAYER_PORT & key_ls->key_layer) { + /* Populate Exact Metadata. */ + nfp_flower_compile_meta_tci((struct nfp_flower_meta_two *)ext, + flow, key_ls->key_layer, false); + /* Populate Mask Metadata. */ + nfp_flower_compile_meta_tci((struct nfp_flower_meta_two *)msk, + flow, key_ls->key_layer, true); + ext += sizeof(struct nfp_flower_meta_two); + msk += sizeof(struct nfp_flower_meta_two); + + /* Populate Exact Port data. */ + err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext, + nfp_repr_get_port_id(netdev), + false); + if (err) + return err; + + /* Populate Mask Port Data. */ + err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, + nfp_repr_get_port_id(netdev), + true); + if (err) + return err; + + ext += sizeof(struct nfp_flower_in_port); + msk += sizeof(struct nfp_flower_in_port); + } else { + /* Populate Exact Metadata. */ + nfp_flower_compile_meta((struct nfp_flower_meta_one *)ext, + key_ls->key_layer); + /* Populate Mask Metadata. */ + nfp_flower_compile_meta((struct nfp_flower_meta_one *)msk, + key_ls->key_layer); + ext += sizeof(struct nfp_flower_meta_one); + msk += sizeof(struct nfp_flower_meta_one); + } + + if (NFP_FLOWER_LAYER_META & key_ls->key_layer) { + /* Additional Metadata Fields. + * Currently unsupported. + */ + return -EOPNOTSUPP; + } + + if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) { + /* Populate Exact MAC Data. */ + nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext, + flow, false); + /* Populate Mask MAC Data. */ + nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)msk, + flow, true); + ext += sizeof(struct nfp_flower_mac_mpls); + msk += sizeof(struct nfp_flower_mac_mpls); + } + + if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) { + /* Populate Exact TP Data. */ + nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext, + flow, false); + /* Populate Mask TP Data. */ + nfp_flower_compile_tport((struct nfp_flower_tp_ports *)msk, + flow, true); + ext += sizeof(struct nfp_flower_tp_ports); + msk += sizeof(struct nfp_flower_tp_ports); + } + + if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) { + /* Populate Exact IPv4 Data. */ + nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext, + flow, false); + /* Populate Mask IPv4 Data. */ + nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)msk, + flow, true); + ext += sizeof(struct nfp_flower_ipv4); + msk += sizeof(struct nfp_flower_ipv4); + } + + if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) { + /* Populate Exact IPv4 Data. */ + nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext, + flow, false); + /* Populate Mask IPv4 Data. */ + nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)msk, + flow, true); + ext += sizeof(struct nfp_flower_ipv6); + msk += sizeof(struct nfp_flower_ipv6); + } + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c new file mode 100644 index 000000000000..fec0ff2ca94f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -0,0 +1,438 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/hash.h> +#include <linux/hashtable.h> +#include <linux/jhash.h> +#include <linux/vmalloc.h> +#include <net/pkt_cls.h> + +#include "cmsg.h" +#include "main.h" +#include "../nfp_app.h" + +struct nfp_mask_id_table { + struct hlist_node link; + u32 hash_key; + u32 ref_cnt; + u8 mask_id; +}; + +static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id) +{ + struct nfp_flower_priv *priv = app->priv; + struct circ_buf *ring; + + ring = &priv->stats_ids.free_list; + /* Check if buffer is full. */ + if (!CIRC_SPACE(ring->head, ring->tail, NFP_FL_STATS_ENTRY_RS * + NFP_FL_STATS_ELEM_RS - + NFP_FL_STATS_ELEM_RS + 1)) + return -ENOBUFS; + + memcpy(&ring->buf[ring->head], &stats_context_id, NFP_FL_STATS_ELEM_RS); + ring->head = (ring->head + NFP_FL_STATS_ELEM_RS) % + (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS); + + return 0; +} + +static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id) +{ + struct nfp_flower_priv *priv = app->priv; + u32 freed_stats_id, temp_stats_id; + struct circ_buf *ring; + + ring = &priv->stats_ids.free_list; + freed_stats_id = NFP_FL_STATS_ENTRY_RS; + /* Check for unallocated entries first. */ + if (priv->stats_ids.init_unalloc > 0) { + *stats_context_id = priv->stats_ids.init_unalloc - 1; + priv->stats_ids.init_unalloc--; + return 0; + } + + /* Check if buffer is empty. */ + if (ring->head == ring->tail) { + *stats_context_id = freed_stats_id; + return -ENOENT; + } + + memcpy(&temp_stats_id, &ring->buf[ring->tail], NFP_FL_STATS_ELEM_RS); + *stats_context_id = temp_stats_id; + memcpy(&ring->buf[ring->tail], &freed_stats_id, NFP_FL_STATS_ELEM_RS); + ring->tail = (ring->tail + NFP_FL_STATS_ELEM_RS) % + (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS); + + return 0; +} + +/* Must be called with either RTNL or rcu_read_lock */ +struct nfp_fl_payload * +nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *flower_entry; + + hash_for_each_possible_rcu(priv->flow_table, flower_entry, link, + tc_flower_cookie) + if (flower_entry->tc_flower_cookie == tc_flower_cookie) + return flower_entry; + + return NULL; +} + +static void +nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats) +{ + struct nfp_fl_payload *nfp_flow; + unsigned long flower_cookie; + + flower_cookie = be64_to_cpu(stats->stats_cookie); + + rcu_read_lock(); + nfp_flow = nfp_flower_search_fl_table(app, flower_cookie); + if (!nfp_flow) + goto exit_rcu_unlock; + + if (nfp_flow->meta.host_ctx_id != stats->stats_con_id) + goto exit_rcu_unlock; + + spin_lock(&nfp_flow->lock); + nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count); + nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count); + nfp_flow->stats.used = jiffies; + spin_unlock(&nfp_flow->lock); + +exit_rcu_unlock: + rcu_read_unlock(); +} + +void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb) +{ + unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN; + struct nfp_fl_stats_frame *stats_frame; + unsigned char *msg; + int i; + + msg = nfp_flower_cmsg_get_data(skb); + + stats_frame = (struct nfp_fl_stats_frame *)msg; + for (i = 0; i < msg_len / sizeof(*stats_frame); i++) + nfp_flower_update_stats(app, stats_frame + i); +} + +static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id) +{ + struct nfp_flower_priv *priv = app->priv; + struct circ_buf *ring; + struct timespec64 now; + + ring = &priv->mask_ids.mask_id_free_list; + /* Checking if buffer is full. */ + if (CIRC_SPACE(ring->head, ring->tail, NFP_FLOWER_MASK_ENTRY_RS) == 0) + return -ENOBUFS; + + memcpy(&ring->buf[ring->head], &mask_id, NFP_FLOWER_MASK_ELEMENT_RS); + ring->head = (ring->head + NFP_FLOWER_MASK_ELEMENT_RS) % + (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS); + + getnstimeofday64(&now); + priv->mask_ids.last_used[mask_id] = now; + + return 0; +} + +static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id) +{ + struct nfp_flower_priv *priv = app->priv; + struct timespec64 delta, now; + struct circ_buf *ring; + u8 temp_id, freed_id; + + ring = &priv->mask_ids.mask_id_free_list; + freed_id = NFP_FLOWER_MASK_ENTRY_RS - 1; + /* Checking for unallocated entries first. */ + if (priv->mask_ids.init_unallocated > 0) { + *mask_id = priv->mask_ids.init_unallocated; + priv->mask_ids.init_unallocated--; + return 0; + } + + /* Checking if buffer is empty. */ + if (ring->head == ring->tail) + goto err_not_found; + + memcpy(&temp_id, &ring->buf[ring->tail], NFP_FLOWER_MASK_ELEMENT_RS); + *mask_id = temp_id; + + getnstimeofday64(&now); + delta = timespec64_sub(now, priv->mask_ids.last_used[*mask_id]); + + if (timespec64_to_ns(&delta) < NFP_FL_MASK_REUSE_TIME_NS) + goto err_not_found; + + memcpy(&ring->buf[ring->tail], &freed_id, NFP_FLOWER_MASK_ELEMENT_RS); + ring->tail = (ring->tail + NFP_FLOWER_MASK_ELEMENT_RS) % + (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS); + + return 0; + +err_not_found: + *mask_id = freed_id; + return -ENOENT; +} + +static int +nfp_add_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_mask_id_table *mask_entry; + unsigned long hash_key; + u8 mask_id; + + if (nfp_mask_alloc(app, &mask_id)) + return -ENOENT; + + mask_entry = kmalloc(sizeof(*mask_entry), GFP_KERNEL); + if (!mask_entry) { + nfp_release_mask_id(app, mask_id); + return -ENOMEM; + } + + INIT_HLIST_NODE(&mask_entry->link); + mask_entry->mask_id = mask_id; + hash_key = jhash(mask_data, mask_len, priv->mask_id_seed); + mask_entry->hash_key = hash_key; + mask_entry->ref_cnt = 1; + hash_add(priv->mask_table, &mask_entry->link, hash_key); + + return mask_id; +} + +static struct nfp_mask_id_table * +nfp_search_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_mask_id_table *mask_entry; + unsigned long hash_key; + + hash_key = jhash(mask_data, mask_len, priv->mask_id_seed); + + hash_for_each_possible(priv->mask_table, mask_entry, link, hash_key) + if (mask_entry->hash_key == hash_key) + return mask_entry; + + return NULL; +} + +static int +nfp_find_in_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len) +{ + struct nfp_mask_id_table *mask_entry; + + mask_entry = nfp_search_mask_table(app, mask_data, mask_len); + if (!mask_entry) + return -ENOENT; + + mask_entry->ref_cnt++; + + /* Casting u8 to int for later use. */ + return mask_entry->mask_id; +} + +static bool +nfp_check_mask_add(struct nfp_app *app, char *mask_data, u32 mask_len, + u8 *meta_flags, u8 *mask_id) +{ + int id; + + id = nfp_find_in_mask_table(app, mask_data, mask_len); + if (id < 0) { + id = nfp_add_mask_table(app, mask_data, mask_len); + if (id < 0) + return false; + *meta_flags |= NFP_FL_META_FLAG_NEW_MASK; + } + *mask_id = id; + + return true; +} + +static bool +nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len, + u8 *meta_flags, u8 *mask_id) +{ + struct nfp_mask_id_table *mask_entry; + + mask_entry = nfp_search_mask_table(app, mask_data, mask_len); + if (!mask_entry) + return false; + + *mask_id = mask_entry->mask_id; + mask_entry->ref_cnt--; + if (!mask_entry->ref_cnt) { + hash_del(&mask_entry->link); + nfp_release_mask_id(app, *mask_id); + kfree(mask_entry); + if (meta_flags) + *meta_flags |= NFP_FL_META_FLAG_LAST_MASK; + } + + return true; +} + +int nfp_compile_flow_metadata(struct nfp_app *app, + struct tc_cls_flower_offload *flow, + struct nfp_fl_payload *nfp_flow) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *check_entry; + u8 new_mask_id; + u32 stats_cxt; + + if (nfp_get_stats_entry(app, &stats_cxt)) + return -ENOENT; + + nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt); + nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie); + + new_mask_id = 0; + if (!nfp_check_mask_add(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, + &nfp_flow->meta.flags, &new_mask_id)) { + if (nfp_release_stats_entry(app, stats_cxt)) + return -EINVAL; + return -ENOENT; + } + + nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version); + priv->flower_version++; + + /* Update flow payload with mask ids. */ + nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id; + nfp_flow->stats.pkts = 0; + nfp_flow->stats.bytes = 0; + nfp_flow->stats.used = jiffies; + + check_entry = nfp_flower_search_fl_table(app, flow->cookie); + if (check_entry) { + if (nfp_release_stats_entry(app, stats_cxt)) + return -EINVAL; + + if (!nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, + NULL, &new_mask_id)) + return -EINVAL; + + return -EEXIST; + } + + return 0; +} + +int nfp_modify_flow_metadata(struct nfp_app *app, + struct nfp_fl_payload *nfp_flow) +{ + struct nfp_flower_priv *priv = app->priv; + u8 new_mask_id = 0; + u32 temp_ctx_id; + + nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, &nfp_flow->meta.flags, + &new_mask_id); + + nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version); + priv->flower_version++; + + /* Update flow payload with mask ids. */ + nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id; + + /* Release the stats ctx id. */ + temp_ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); + + return nfp_release_stats_entry(app, temp_ctx_id); +} + +int nfp_flower_metadata_init(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + hash_init(priv->mask_table); + hash_init(priv->flow_table); + get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed)); + + /* Init ring buffer and unallocated mask_ids. */ + priv->mask_ids.mask_id_free_list.buf = + kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS, + NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL); + if (!priv->mask_ids.mask_id_free_list.buf) + return -ENOMEM; + + priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1; + + /* Init timestamps for mask id*/ + priv->mask_ids.last_used = + kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS, + sizeof(*priv->mask_ids.last_used), GFP_KERNEL); + if (!priv->mask_ids.last_used) + goto err_free_mask_id; + + /* Init ring buffer and unallocated stats_ids. */ + priv->stats_ids.free_list.buf = + vmalloc(NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS); + if (!priv->stats_ids.free_list.buf) + goto err_free_last_used; + + priv->stats_ids.init_unalloc = NFP_FL_REPEATED_HASH_MAX; + + return 0; + +err_free_last_used: + kfree(priv->stats_ids.free_list.buf); +err_free_mask_id: + kfree(priv->mask_ids.mask_id_free_list.buf); + return -ENOMEM; +} + +void nfp_flower_metadata_cleanup(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + if (!priv) + return; + + kfree(priv->mask_ids.mask_id_free_list.buf); + kfree(priv->mask_ids.last_used); + vfree(priv->stats_ids.free_list.buf); +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c new file mode 100644 index 000000000000..4ad10bd5e139 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -0,0 +1,400 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> +#include <net/devlink.h> +#include <net/pkt_cls.h> + +#include "cmsg.h" +#include "main.h" +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nsp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "../nfp_port.h" + +static int +nfp_flower_xmit_flow(struct net_device *netdev, + struct nfp_fl_payload *nfp_flow, u8 mtype) +{ + u32 meta_len, key_len, mask_len, act_len, tot_len; + struct nfp_repr *priv = netdev_priv(netdev); + struct sk_buff *skb; + unsigned char *msg; + + meta_len = sizeof(struct nfp_fl_rule_metadata); + key_len = nfp_flow->meta.key_len; + mask_len = nfp_flow->meta.mask_len; + act_len = nfp_flow->meta.act_len; + + tot_len = meta_len + key_len + mask_len + act_len; + + /* Convert to long words as firmware expects + * lengths in units of NFP_FL_LW_SIZ. + */ + nfp_flow->meta.key_len >>= NFP_FL_LW_SIZ; + nfp_flow->meta.mask_len >>= NFP_FL_LW_SIZ; + nfp_flow->meta.act_len >>= NFP_FL_LW_SIZ; + + skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + memcpy(msg, &nfp_flow->meta, meta_len); + memcpy(&msg[meta_len], nfp_flow->unmasked_data, key_len); + memcpy(&msg[meta_len + key_len], nfp_flow->mask_data, mask_len); + memcpy(&msg[meta_len + key_len + mask_len], + nfp_flow->action_data, act_len); + + /* Convert back to bytes as software expects + * lengths in units of bytes. + */ + nfp_flow->meta.key_len <<= NFP_FL_LW_SIZ; + nfp_flow->meta.mask_len <<= NFP_FL_LW_SIZ; + nfp_flow->meta.act_len <<= NFP_FL_LW_SIZ; + + nfp_ctrl_tx(priv->app->ctrl, skb); + + return 0; +} + +static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f) +{ + return dissector_uses_key(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS) || + dissector_uses_key(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS) || + dissector_uses_key(f->dissector, + FLOW_DISSECTOR_KEY_PORTS) || + dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ICMP); +} + +static int +nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, + struct tc_cls_flower_offload *flow) +{ + struct flow_dissector_key_control *mask_enc_ctl; + struct flow_dissector_key_basic *mask_basic; + struct flow_dissector_key_basic *key_basic; + u32 key_layer_two; + u8 key_layer; + int key_size; + + mask_enc_ctl = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + flow->mask); + + mask_basic = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_BASIC, + flow->mask); + + key_basic = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_BASIC, + flow->key); + key_layer_two = 0; + key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC; + key_size = sizeof(struct nfp_flower_meta_one) + + sizeof(struct nfp_flower_in_port) + + sizeof(struct nfp_flower_mac_mpls); + + /* We are expecting a tunnel. For now we ignore offloading. */ + if (mask_enc_ctl->addr_type) + return -EOPNOTSUPP; + + if (mask_basic->n_proto) { + /* Ethernet type is present in the key. */ + switch (key_basic->n_proto) { + case cpu_to_be16(ETH_P_IP): + key_layer |= NFP_FLOWER_LAYER_IPV4; + key_size += sizeof(struct nfp_flower_ipv4); + break; + + case cpu_to_be16(ETH_P_IPV6): + key_layer |= NFP_FLOWER_LAYER_IPV6; + key_size += sizeof(struct nfp_flower_ipv6); + break; + + /* Currently we do not offload ARP + * because we rely on it to get to the host. + */ + case cpu_to_be16(ETH_P_ARP): + return -EOPNOTSUPP; + + /* Will be included in layer 2. */ + case cpu_to_be16(ETH_P_8021Q): + break; + + default: + /* Other ethtype - we need check the masks for the + * remainder of the key to ensure we can offload. + */ + if (nfp_flower_check_higher_than_mac(flow)) + return -EOPNOTSUPP; + break; + } + } + + if (mask_basic->ip_proto) { + /* Ethernet type is present in the key. */ + switch (key_basic->ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_SCTP: + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + key_layer |= NFP_FLOWER_LAYER_TP; + key_size += sizeof(struct nfp_flower_tp_ports); + break; + default: + /* Other ip proto - we need check the masks for the + * remainder of the key to ensure we can offload. + */ + return -EOPNOTSUPP; + } + } + + ret_key_ls->key_layer = key_layer; + ret_key_ls->key_layer_two = key_layer_two; + ret_key_ls->key_size = key_size; + + return 0; +} + +static struct nfp_fl_payload * +nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) +{ + struct nfp_fl_payload *flow_pay; + + flow_pay = kmalloc(sizeof(*flow_pay), GFP_KERNEL); + if (!flow_pay) + return NULL; + + flow_pay->meta.key_len = key_layer->key_size; + flow_pay->unmasked_data = kmalloc(key_layer->key_size, GFP_KERNEL); + if (!flow_pay->unmasked_data) + goto err_free_flow; + + flow_pay->meta.mask_len = key_layer->key_size; + flow_pay->mask_data = kmalloc(key_layer->key_size, GFP_KERNEL); + if (!flow_pay->mask_data) + goto err_free_unmasked; + + flow_pay->action_data = kmalloc(NFP_FL_MAX_A_SIZ, GFP_KERNEL); + if (!flow_pay->action_data) + goto err_free_mask; + + flow_pay->meta.flags = 0; + spin_lock_init(&flow_pay->lock); + + return flow_pay; + +err_free_mask: + kfree(flow_pay->mask_data); +err_free_unmasked: + kfree(flow_pay->unmasked_data); +err_free_flow: + kfree(flow_pay); + return NULL; +} + +/** + * nfp_flower_add_offload() - Adds a new flow to hardware. + * @app: Pointer to the APP handle + * @netdev: netdev structure. + * @flow: TC flower classifier offload structure. + * + * Adds a new flow to the repeated hash structure and action payload. + * + * Return: negative value on error, 0 if configured successfully. + */ +static int +nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_flower_offload *flow) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_payload *flow_pay; + struct nfp_fl_key_ls *key_layer; + int err; + + key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL); + if (!key_layer) + return -ENOMEM; + + err = nfp_flower_calculate_key_layers(key_layer, flow); + if (err) + goto err_free_key_ls; + + flow_pay = nfp_flower_allocate_new(key_layer); + if (!flow_pay) { + err = -ENOMEM; + goto err_free_key_ls; + } + + err = nfp_flower_compile_flow_match(flow, key_layer, netdev, flow_pay); + if (err) + goto err_destroy_flow; + + err = nfp_flower_compile_action(flow, netdev, flow_pay); + if (err) + goto err_destroy_flow; + + err = nfp_compile_flow_metadata(app, flow, flow_pay); + if (err) + goto err_destroy_flow; + + err = nfp_flower_xmit_flow(netdev, flow_pay, + NFP_FLOWER_CMSG_TYPE_FLOW_ADD); + if (err) + goto err_destroy_flow; + + INIT_HLIST_NODE(&flow_pay->link); + flow_pay->tc_flower_cookie = flow->cookie; + hash_add_rcu(priv->flow_table, &flow_pay->link, flow->cookie); + + /* Deallocate flow payload when flower rule has been destroyed. */ + kfree(key_layer); + + return 0; + +err_destroy_flow: + kfree(flow_pay->action_data); + kfree(flow_pay->mask_data); + kfree(flow_pay->unmasked_data); + kfree(flow_pay); +err_free_key_ls: + kfree(key_layer); + return err; +} + +/** + * nfp_flower_del_offload() - Removes a flow from hardware. + * @app: Pointer to the APP handle + * @netdev: netdev structure. + * @flow: TC flower classifier offload structure + * + * Removes a flow from the repeated hash structure and clears the + * action payload. + * + * Return: negative value on error, 0 if removed successfully. + */ +static int +nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_flower_offload *flow) +{ + struct nfp_fl_payload *nfp_flow; + int err; + + nfp_flow = nfp_flower_search_fl_table(app, flow->cookie); + if (!nfp_flow) + return -ENOENT; + + err = nfp_modify_flow_metadata(app, nfp_flow); + if (err) + goto err_free_flow; + + err = nfp_flower_xmit_flow(netdev, nfp_flow, + NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + if (err) + goto err_free_flow; + +err_free_flow: + hash_del_rcu(&nfp_flow->link); + kfree(nfp_flow->action_data); + kfree(nfp_flow->mask_data); + kfree(nfp_flow->unmasked_data); + kfree_rcu(nfp_flow, rcu); + return err; +} + +/** + * nfp_flower_get_stats() - Populates flow stats obtained from hardware. + * @app: Pointer to the APP handle + * @flow: TC flower classifier offload structure + * + * Populates a flow statistics structure which which corresponds to a + * specific flow. + * + * Return: negative value on error, 0 if stats populated successfully. + */ +static int +nfp_flower_get_stats(struct nfp_app *app, struct tc_cls_flower_offload *flow) +{ + struct nfp_fl_payload *nfp_flow; + + nfp_flow = nfp_flower_search_fl_table(app, flow->cookie); + if (!nfp_flow) + return -EINVAL; + + spin_lock_bh(&nfp_flow->lock); + tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes, + nfp_flow->stats.pkts, nfp_flow->stats.used); + + nfp_flow->stats.pkts = 0; + nfp_flow->stats.bytes = 0; + spin_unlock_bh(&nfp_flow->lock); + + return 0; +} + +static int +nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_flower_offload *flower) +{ + switch (flower->command) { + case TC_CLSFLOWER_REPLACE: + return nfp_flower_add_offload(app, netdev, flower); + case TC_CLSFLOWER_DESTROY: + return nfp_flower_del_offload(app, netdev, flower); + case TC_CLSFLOWER_STATS: + return nfp_flower_get_stats(app, flower); + } + + return -EOPNOTSUPP; +} + +int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, + u32 handle, __be16 proto, struct tc_to_netdev *tc) +{ + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + return -EOPNOTSUPP; + + if (!eth_proto_is_802_3(proto)) + return -EOPNOTSUPP; + + if (tc->type != TC_SETUP_CLSFLOWER) + return -EINVAL; + + return nfp_flower_repr_offload(app, netdev, tc->cls_flower); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 5620de05c996..c704c022574f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -43,7 +43,9 @@ static const struct nfp_app_type *apps[] = { &app_nic, &app_bpf, +#ifdef CONFIG_NFP_APP_FLOWER &app_flower, +#endif }; const char *nfp_app_mip_name(struct nfp_app *app) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index ae2d02753d1a..5d714e10d9a9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -47,6 +47,7 @@ struct sk_buff; struct nfp_app; struct nfp_cpp; struct nfp_pf; +struct nfp_repr; struct nfp_net; enum nfp_app_id { @@ -66,10 +67,13 @@ extern const struct nfp_app_type app_flower; * @ctrl_has_meta: control messages have prepend of type:5/port:CTRL * * Callbacks - * @init: perform basic app checks + * @init: perform basic app checks and init + * @clean: clean app state * @extra_cap: extra capabilities string * @vnic_init: init vNICs (assign port types, etc.) * @vnic_clean: clean up app's vNIC state + * @repr_open: representor netdev open callback + * @repr_stop: representor netdev stop callback * @start: start application logic * @stop: stop application logic * @ctrl_msg_rx: control message handler @@ -88,6 +92,7 @@ struct nfp_app_type { bool ctrl_has_meta; int (*init)(struct nfp_app *app); + void (*clean)(struct nfp_app *app); const char *(*extra_cap)(struct nfp_app *app, struct nfp_net *nn); @@ -95,6 +100,9 @@ struct nfp_app_type { unsigned int id); void (*vnic_clean)(struct nfp_app *app, struct nfp_net *nn); + int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr); + int (*repr_stop)(struct nfp_app *app, struct nfp_repr *repr); + int (*start)(struct nfp_app *app); void (*stop)(struct nfp_app *app); @@ -144,6 +152,12 @@ static inline int nfp_app_init(struct nfp_app *app) return app->type->init(app); } +static inline void nfp_app_clean(struct nfp_app *app) +{ + if (app->type->clean) + app->type->clean(app); +} + static inline int nfp_app_vnic_init(struct nfp_app *app, struct nfp_net *nn, unsigned int id) { @@ -156,6 +170,20 @@ static inline void nfp_app_vnic_clean(struct nfp_app *app, struct nfp_net *nn) app->type->vnic_clean(app, nn); } +static inline int nfp_app_repr_open(struct nfp_app *app, struct nfp_repr *repr) +{ + if (!app->type->repr_open) + return -EINVAL; + return app->type->repr_open(app, repr); +} + +static inline int nfp_app_repr_stop(struct nfp_app *app, struct nfp_repr *repr) +{ + if (!app->type->repr_stop) + return -EINVAL; + return app->type->repr_stop(app, repr); +} + static inline int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl) { app->ctrl = ctrl; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 748e54cc885e..d67969d3e484 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -107,17 +107,18 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs) goto err_unlock; } - err = nfp_app_sriov_enable(pf->app, num_vfs); + err = pci_enable_sriov(pdev, num_vfs); if (err) { - dev_warn(&pdev->dev, "App specific PCI sriov configuration failed: %d\n", - err); + dev_warn(&pdev->dev, "Failed to enable PCI SR-IOV: %d\n", err); goto err_unlock; } - err = pci_enable_sriov(pdev, num_vfs); + err = nfp_app_sriov_enable(pf->app, num_vfs); if (err) { - dev_warn(&pdev->dev, "Failed to enable PCI sriov: %d\n", err); - goto err_app_sriov_disable; + dev_warn(&pdev->dev, + "App specific PCI SR-IOV configuration failed: %d\n", + err); + goto err_sriov_disable; } pf->num_vfs = num_vfs; @@ -127,8 +128,8 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs) mutex_unlock(&pf->lock); return num_vfs; -err_app_sriov_disable: - nfp_app_sriov_disable(pf->app); +err_sriov_disable: + pci_disable_sriov(pdev); err_unlock: mutex_unlock(&pf->lock); return err; @@ -136,17 +137,20 @@ err_unlock: return 0; } -static int __nfp_pcie_sriov_disable(struct pci_dev *pdev) +static int nfp_pcie_sriov_disable(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct nfp_pf *pf = pci_get_drvdata(pdev); + mutex_lock(&pf->lock); + /* If the VFs are assigned we cannot shut down SR-IOV without * causing issues, so just leave the hardware available but * disabled */ if (pci_vfs_assigned(pdev)) { dev_warn(&pdev->dev, "Disabling while VFs assigned - VFs will not be deallocated\n"); + mutex_unlock(&pf->lock); return -EPERM; } @@ -156,20 +160,10 @@ static int __nfp_pcie_sriov_disable(struct pci_dev *pdev) pci_disable_sriov(pdev); dev_dbg(&pdev->dev, "Removed VFs.\n"); -#endif - return 0; -} - -static int nfp_pcie_sriov_disable(struct pci_dev *pdev) -{ - struct nfp_pf *pf = pci_get_drvdata(pdev); - int err; - mutex_lock(&pf->lock); - err = __nfp_pcie_sriov_disable(pdev); mutex_unlock(&pf->lock); - - return err; +#endif + return 0; } static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs) @@ -382,6 +376,12 @@ static int nfp_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, pf); pf->pdev = pdev; + pf->wq = alloc_workqueue("nfp-%s", 0, 2, pci_name(pdev)); + if (!pf->wq) { + err = -ENOMEM; + goto err_pci_priv_unset; + } + pf->cpp = nfp_cpp_from_nfp6000_pcie(pdev); if (IS_ERR_OR_NULL(pf->cpp)) { err = PTR_ERR(pf->cpp); @@ -414,6 +414,14 @@ static int nfp_pci_probe(struct pci_dev *pdev, if (err) goto err_fw_unload; + pf->num_vfs = pci_num_vf(pdev); + if (pf->num_vfs > pf->limit_vfs) { + dev_err(&pdev->dev, + "Error: %d VFs already enabled, but loaded FW can only support %d\n", + pf->num_vfs, pf->limit_vfs); + goto err_fw_unload; + } + err = nfp_net_pci_probe(pf); if (err) goto err_sriov_unlimit; @@ -443,6 +451,8 @@ err_hwinfo_free: kfree(pf->hwinfo); nfp_cpp_free(pf->cpp); err_disable_msix: + destroy_workqueue(pf->wq); +err_pci_priv_unset: pci_set_drvdata(pdev, NULL); mutex_destroy(&pf->lock); devlink_free(devlink); @@ -463,11 +473,11 @@ static void nfp_pci_remove(struct pci_dev *pdev) devlink = priv_to_devlink(pf); + nfp_net_pci_remove(pf); + nfp_pcie_sriov_disable(pdev); pci_sriov_set_totalvfs(pf->pdev, 0); - nfp_net_pci_remove(pf); - devlink_unregister(devlink); kfree(pf->rtbl); @@ -475,6 +485,7 @@ static void nfp_pci_remove(struct pci_dev *pdev) if (pf->fw_loaded) nfp_fw_unload(pf); + destroy_workqueue(pf->wq); pci_set_drvdata(pdev, NULL); kfree(pf->hwinfo); nfp_cpp_free(pf->cpp); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index edc14dc78674..a08cfba7e68e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -89,6 +89,7 @@ struct nfp_rtsym_table; * @num_vnics: Number of vNICs spawned * @vnics: Linked list of vNIC structures (struct nfp_net) * @ports: Linked list of port structures (struct nfp_port) + * @wq: Workqueue for running works which need to grab @lock * @port_refresh_work: Work entry for taking netdevs out * @lock: Protects all fields which may change after probe */ @@ -131,7 +132,10 @@ struct nfp_pf { struct list_head vnics; struct list_head ports; + + struct workqueue_struct *wq; struct work_struct port_refresh_work; + struct mutex lock; }; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 2e728543e840..30f82b41d400 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -64,6 +64,7 @@ #include <linux/vmalloc.h> #include <linux/ktime.h> +#include <net/switchdev.h> #include <net/vxlan.h> #include "nfpcore/nfp_nsp.h" @@ -3096,18 +3097,6 @@ static void nfp_net_stat64(struct net_device *netdev, } } -static int -nfp_net_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index, - __be16 proto, struct tc_to_netdev *tc) -{ - struct nfp_net *nn = netdev_priv(netdev); - - if (chain_index) - return -EOPNOTSUPP; - - return nfp_app_setup_tc(nn->app, netdev, handle, proto, tc); -} - static int nfp_net_set_features(struct net_device *netdev, netdev_features_t features) { @@ -3423,7 +3412,7 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_get_stats64 = nfp_net_stat64, .ndo_vlan_rx_add_vid = nfp_net_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = nfp_net_vlan_rx_kill_vid, - .ndo_setup_tc = nfp_net_setup_tc, + .ndo_setup_tc = nfp_port_setup_tc, .ndo_tx_timeout = nfp_net_tx_timeout, .ndo_set_rx_mode = nfp_net_set_rx_mode, .ndo_change_mtu = nfp_net_change_mtu, @@ -3703,6 +3692,8 @@ static void nfp_net_netdev_init(struct nfp_net *nn) netdev->netdev_ops = &nfp_net_netdev_ops; netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); + SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops); + /* MTU range: 68 - hw-specific max */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = nn->max_mtu; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index cfcbc3b9a9aa..c85a2f18c4df 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -81,58 +81,6 @@ static int nfp_is_ready(struct nfp_pf *pf) } /** - * nfp_net_map_area() - Help function to map an area - * @cpp: NFP CPP handler - * @name: Name for the area - * @target: CPP target - * @addr: CPP address - * @size: Size of the area - * @area: Area handle (returned). - * - * This function is primarily to simplify the code in the main probe - * function. To undo the effect of this functions call - * @nfp_cpp_area_release_free(*area); - * - * Return: Pointer to memory mapped area or ERR_PTR - */ -static u8 __iomem *nfp_net_map_area(struct nfp_cpp *cpp, - const char *name, int isl, int target, - unsigned long long addr, unsigned long size, - struct nfp_cpp_area **area) -{ - u8 __iomem *res; - u32 dest; - int err; - - dest = NFP_CPP_ISLAND_ID(target, NFP_CPP_ACTION_RW, 0, isl); - - *area = nfp_cpp_area_alloc_with_name(cpp, dest, name, addr, size); - if (!*area) { - err = -EIO; - goto err_area; - } - - err = nfp_cpp_area_acquire(*area); - if (err < 0) - goto err_acquire; - - res = nfp_cpp_area_iomem(*area); - if (!res) { - err = -EIO; - goto err_map; - } - - return res; - -err_map: - nfp_cpp_area_release(*area); -err_acquire: - nfp_cpp_area_free(*area); -err_area: - return (u8 __iomem *)ERR_PTR(err); -} - -/** * nfp_net_get_mac_addr() - Get the MAC address. * @pf: NFP PF handle * @port: NFP port structure @@ -226,31 +174,12 @@ static u8 __iomem * nfp_net_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt, unsigned int min_size, struct nfp_cpp_area **area) { - const struct nfp_rtsym *sym; char pf_symbol[256]; - u8 __iomem *mem; snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt, nfp_cppcore_pcie_unit(pf->cpp)); - sym = nfp_rtsym_lookup(pf->rtbl, pf_symbol); - if (!sym) - return (u8 __iomem *)ERR_PTR(-ENOENT); - - if (sym->size < min_size) { - nfp_err(pf->cpp, "PF symbol %s too small\n", pf_symbol); - return (u8 __iomem *)ERR_PTR(-EINVAL); - } - - mem = nfp_net_map_area(pf->cpp, name, sym->domain, sym->target, - sym->addr, sym->size, area); - if (IS_ERR(mem)) { - nfp_err(pf->cpp, "Failed to map PF symbol %s: %ld\n", - pf_symbol, PTR_ERR(mem)); - return mem; - } - - return mem; + return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area); } static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn) @@ -485,7 +414,7 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) if (IS_ERR(ctrl_bar)) { nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n"); err = PTR_ERR(ctrl_bar); - goto err_free; + goto err_app_clean; } pf->ctrl_vnic = nfp_net_pf_alloc_vnic(pf, false, ctrl_bar, qc_bar, @@ -499,8 +428,11 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) err_unmap: nfp_cpp_area_release_free(pf->ctrl_vnic_bar); +err_app_clean: + nfp_app_clean(pf->app); err_free: nfp_app_free(pf->app); + pf->app = NULL; return err; } @@ -510,6 +442,7 @@ static void nfp_net_pf_app_clean(struct nfp_pf *pf) nfp_net_pf_free_vnic(pf, pf->ctrl_vnic); nfp_cpp_area_release_free(pf->ctrl_vnic_bar); } + nfp_app_clean(pf->app); nfp_app_free(pf->app); pf->app = NULL; } @@ -555,8 +488,16 @@ static int nfp_net_pf_app_start(struct nfp_pf *pf) if (err) goto err_ctrl_stop; + if (pf->num_vfs) { + err = nfp_app_sriov_enable(pf->app, pf->num_vfs); + if (err) + goto err_app_stop; + } + return 0; +err_app_stop: + nfp_app_stop(pf->app); err_ctrl_stop: nfp_net_pf_app_stop_ctrl(pf); return err; @@ -564,6 +505,8 @@ err_ctrl_stop: static void nfp_net_pf_app_stop(struct nfp_pf *pf) { + if (pf->num_vfs) + nfp_app_sriov_disable(pf->app); nfp_app_stop(pf->app); nfp_net_pf_app_stop_ctrl(pf); } @@ -580,26 +523,22 @@ static void nfp_net_pci_unmap_mem(struct nfp_pf *pf) static int nfp_net_pci_map_mem(struct nfp_pf *pf) { - u32 ctrl_bar_sz; u8 __iomem *mem; + u32 min_size; int err; - ctrl_bar_sz = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE; + min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE; mem = nfp_net_pf_map_rtsym(pf, "net.ctrl", "_pf%d_net_bar0", - ctrl_bar_sz, &pf->data_vnic_bar); + min_size, &pf->data_vnic_bar); if (IS_ERR(mem)) { nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n"); - err = PTR_ERR(mem); - if (!pf->fw_loaded && err == -ENOENT) - err = -EPROBE_DEFER; - return err; + return PTR_ERR(mem); } - pf->mac_stats_mem = nfp_net_pf_map_rtsym(pf, "net.macstats", - "_mac_stats", - NFP_MAC_STATS_SIZE * - (pf->eth_tbl->max_index + 1), - &pf->mac_stats_bar); + min_size = NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1); + pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats", + "net.macstats", min_size, + &pf->mac_stats_bar); if (IS_ERR(pf->mac_stats_mem)) { if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) { err = PTR_ERR(pf->mac_stats_mem); @@ -620,7 +559,7 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf) pf->vf_cfg_mem = NULL; } - mem = nfp_net_map_area(pf->cpp, "net.qc", 0, 0, + mem = nfp_cpp_map_area(pf->cpp, "net.qc", 0, 0, NFP_PCIE_QUEUE(0), NFP_QCP_QUEUE_AREA_SZ, &pf->qc_area); if (IS_ERR(mem)) { @@ -743,7 +682,7 @@ void nfp_net_refresh_port_table(struct nfp_port *port) set_bit(NFP_PORT_CHANGED, &port->flags); - schedule_work(&pf->port_refresh_work); + queue_work(pf->wq, &pf->port_refresh_work); } int nfp_net_refresh_eth_port(struct nfp_port *port) @@ -786,6 +725,12 @@ int nfp_net_pci_probe(struct nfp_pf *pf) return -EINVAL; } + if (!pf->rtbl) { + nfp_err(pf->cpp, "No %s, giving up.\n", + pf->fw_loaded ? "symbol table" : "firmware found"); + return -EPROBE_DEFER; + } + mutex_lock(&pf->lock); pf->max_data_vnics = nfp_net_pf_get_num_ports(pf); if ((int)pf->max_data_vnics < 0) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 44adcc5df11e..8ec5474f4b18 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -35,8 +35,10 @@ #include <linux/io-64-nonatomic-hi-lo.h> #include <linux/lockdep.h> #include <net/dst_metadata.h> +#include <net/switchdev.h> #include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_nsp.h" #include "nfp_app.h" #include "nfp_main.h" #include "nfp_net_ctrl.h" @@ -135,25 +137,34 @@ nfp_repr_pf_get_stats64(const struct nfp_app *app, u8 pf, stats->rx_dropped = readq(mem + NFP_NET_CFG_STATS_TX_DISCARDS); } -void -nfp_repr_get_stats64(const struct nfp_app *app, enum nfp_repr_type type, - u8 port, struct rtnl_link_stats64 *stats) +static void +nfp_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { - switch (type) { - case NFP_REPR_TYPE_PHYS_PORT: - nfp_repr_phy_port_get_stats64(app, port, stats); + struct nfp_repr *repr = netdev_priv(netdev); + struct nfp_eth_table_port *eth_port; + struct nfp_app *app = repr->app; + + if (WARN_ON(!repr->port)) + return; + + switch (repr->port->type) { + case NFP_PORT_PHYS_PORT: + eth_port = __nfp_port_get_eth_port(repr->port); + if (!eth_port) + break; + nfp_repr_phy_port_get_stats64(app, eth_port->index, stats); break; - case NFP_REPR_TYPE_PF: - nfp_repr_pf_get_stats64(app, port, stats); + case NFP_PORT_PF_PORT: + nfp_repr_pf_get_stats64(app, repr->port->pf_id, stats); break; - case NFP_REPR_TYPE_VF: - nfp_repr_vf_get_stats64(app, port, stats); + case NFP_PORT_VF_PORT: + nfp_repr_vf_get_stats64(app, repr->port->vf_id, stats); default: break; } } -bool +static bool nfp_repr_has_offload_stats(const struct net_device *dev, int attr_id) { switch (attr_id) { @@ -196,8 +207,9 @@ nfp_repr_get_host_stats64(const struct net_device *netdev, return 0; } -int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev, - void *stats) +static int +nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev, + void *stats) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@ -207,7 +219,7 @@ int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev, return -EINVAL; } -netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) { struct nfp_repr *repr = netdev_priv(netdev); unsigned int len = skb->len; @@ -224,6 +236,31 @@ netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) return ret; } +static int nfp_repr_stop(struct net_device *netdev) +{ + struct nfp_repr *repr = netdev_priv(netdev); + + return nfp_app_repr_stop(repr->app, repr); +} + +static int nfp_repr_open(struct net_device *netdev) +{ + struct nfp_repr *repr = netdev_priv(netdev); + + return nfp_app_repr_open(repr->app, repr); +} + +const struct net_device_ops nfp_repr_netdev_ops = { + .ndo_open = nfp_repr_open, + .ndo_stop = nfp_repr_stop, + .ndo_start_xmit = nfp_repr_xmit, + .ndo_get_stats64 = nfp_repr_get_stats64, + .ndo_has_offload_stats = nfp_repr_has_offload_stats, + .ndo_get_offload_stats = nfp_repr_get_offload_stats, + .ndo_get_phys_port_name = nfp_port_get_phys_port_name, + .ndo_setup_tc = nfp_port_setup_tc, +}; + static void nfp_repr_clean(struct nfp_repr *repr) { unregister_netdev(repr->netdev); @@ -248,8 +285,8 @@ static void nfp_repr_set_lockdep_class(struct net_device *dev) } int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, - const struct net_device_ops *netdev_ops, u32 cmsg_port_id, - struct nfp_port *port, struct net_device *pf_netdev) + u32 cmsg_port_id, struct nfp_port *port, + struct net_device *pf_netdev) { struct nfp_repr *repr = netdev_priv(netdev); int err; @@ -263,7 +300,13 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, repr->dst->u.port_info.port_id = cmsg_port_id; repr->dst->u.port_info.lower_dev = pf_netdev; - netdev->netdev_ops = netdev_ops; + netdev->netdev_ops = &nfp_repr_netdev_ops; + SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops); + + if (nfp_app_has_tc(app)) { + netdev->features |= NETIF_F_HW_TC; + netdev->hw_features |= NETIF_F_HW_TC; + } err = register_netdev(netdev); if (err) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h index c5ed6611f708..32179cad062a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h @@ -38,6 +38,8 @@ struct metadata_dst; struct nfp_net; struct nfp_port; +#include <net/dst_metadata.h> + /** * struct nfp_reprs - container for representor netdevs * @num_reprs: Number of elements in reprs array @@ -97,16 +99,22 @@ enum nfp_repr_type { }; #define NFP_REPR_TYPE_MAX (__NFP_REPR_TYPE_MAX - 1) +extern const struct net_device_ops nfp_repr_netdev_ops; + +static inline bool nfp_netdev_is_nfp_repr(struct net_device *netdev) +{ + return netdev->netdev_ops == &nfp_repr_netdev_ops; +} + +static inline int nfp_repr_get_port_id(struct net_device *netdev) +{ + struct nfp_repr *priv = netdev_priv(netdev); + + return priv->dst->u.port_info.port_id; +} + void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len); -void -nfp_repr_get_stats64(const struct nfp_app *app, enum nfp_repr_type type, - u8 port, struct rtnl_link_stats64 *stats); -bool nfp_repr_has_offload_stats(const struct net_device *dev, int attr_id); -int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev, - void *stats); -netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev); int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, - const struct net_device_ops *netdev_ops, u32 cmsg_port_id, struct nfp_port *port, struct net_device *pf_netdev); struct net_device *nfp_repr_alloc(struct nfp_app *app); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c index 19bceeb82225..776e54dd5dd0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c @@ -32,6 +32,7 @@ */ #include <linux/lockdep.h> +#include <net/switchdev.h> #include "nfpcore/nfp_cpp.h" #include "nfpcore/nfp_nsp.h" @@ -42,13 +43,64 @@ struct nfp_port *nfp_port_from_netdev(struct net_device *netdev) { - struct nfp_net *nn; + if (nfp_netdev_is_nfp_net(netdev)) { + struct nfp_net *nn = netdev_priv(netdev); - if (WARN_ON(!nfp_netdev_is_nfp_net(netdev))) - return NULL; - nn = netdev_priv(netdev); + return nn->port; + } + + if (nfp_netdev_is_nfp_repr(netdev)) { + struct nfp_repr *repr = netdev_priv(netdev); + + return repr->port; + } - return nn->port; + WARN(1, "Unknown netdev type for nfp_port\n"); + + return NULL; +} + +static int +nfp_port_attr_get(struct net_device *netdev, struct switchdev_attr *attr) +{ + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: { + const u8 *serial; + /* N.B: attr->u.ppid.id is binary data */ + attr->u.ppid.id_len = nfp_cpp_serial(port->app->cpp, &serial); + memcpy(&attr->u.ppid.id, serial, attr->u.ppid.id_len); + break; + } + default: + return -EOPNOTSUPP; + } + + return 0; +} + +const struct switchdev_ops nfp_port_switchdev_ops = { + .switchdev_port_attr_get = nfp_port_attr_get, +}; + +int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index, + __be16 proto, struct tc_to_netdev *tc) +{ + struct nfp_port *port; + + if (chain_index) + return -EOPNOTSUPP; + + port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + + return nfp_app_setup_tc(port->app, netdev, handle, proto, tc); } struct nfp_port * @@ -98,15 +150,31 @@ nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len) int n; port = nfp_port_from_netdev(netdev); - eth_port = __nfp_port_get_eth_port(port); - if (!eth_port) + if (!port) + return -EOPNOTSUPP; + + switch (port->type) { + case NFP_PORT_PHYS_PORT: + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!eth_port->is_split) + n = snprintf(name, len, "p%d", eth_port->label_port); + else + n = snprintf(name, len, "p%ds%d", eth_port->label_port, + eth_port->label_subport); + break; + case NFP_PORT_PF_PORT: + n = snprintf(name, len, "pf%d", port->pf_id); + break; + case NFP_PORT_VF_PORT: + n = snprintf(name, len, "pf%dvf%d", port->pf_id, port->vf_id); + break; + default: return -EOPNOTSUPP; + } - if (!eth_port->is_split) - n = snprintf(name, len, "p%d", eth_port->label_port); - else - n = snprintf(name, len, "p%ds%d", eth_port->label_port, - eth_port->label_subport); if (n >= len) return -EINVAL; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h index f472bea4ec2b..a33d22e18f94 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -36,6 +36,7 @@ #include <net/devlink.h> +struct tc_to_netdev; struct net_device; struct nfp_app; struct nfp_pf; @@ -47,10 +48,14 @@ struct nfp_port; * state when port disappears because of FW fault or config * change * @NFP_PORT_PHYS_PORT: external NIC port + * @NFP_PORT_PF_PORT: logical port of PCI PF + * @NFP_PORT_VF_PORT: logical port of PCI VF */ enum nfp_port_type { NFP_PORT_INVALID, NFP_PORT_PHYS_PORT, + NFP_PORT_PF_PORT, + NFP_PORT_VF_PORT, }; /** @@ -72,6 +77,8 @@ enum nfp_port_flags { * @dl_port: devlink port structure * @eth_id: for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme * @eth_port: for %NFP_PORT_PHYS_PORT translated ETH Table port entry + * @pf_id: for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT ID of the PCI PF (0-3) + * @vf_id: for %NFP_PORT_VF_PORT ID of the PCI VF within @pf_id * @port_list: entry on pf's list of ports */ struct nfp_port { @@ -84,12 +91,27 @@ struct nfp_port { struct devlink_port dl_port; - unsigned int eth_id; - struct nfp_eth_table_port *eth_port; + union { + /* NFP_PORT_PHYS_PORT */ + struct { + unsigned int eth_id; + struct nfp_eth_table_port *eth_port; + }; + /* NFP_PORT_PF_PORT, NFP_PORT_VF_PORT */ + struct { + unsigned int pf_id; + unsigned int vf_id; + }; + }; struct list_head port_list; }; +extern const struct switchdev_ops nfp_port_switchdev_ops; + +int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index, + __be16 proto, struct tc_to_netdev *tc); + struct nfp_port *nfp_port_from_netdev(struct net_device *netdev); struct nfp_port * nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h index 25a967158ce9..5798adc57cbc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h @@ -230,6 +230,9 @@ struct nfp_cpp_area *nfp_cpp_area_alloc_with_name(struct nfp_cpp *cpp, struct nfp_cpp_area *nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 cpp_id, unsigned long long address, unsigned long size); +struct nfp_cpp_area * +nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, const char *name, u32 cpp_id, + unsigned long long address, unsigned long size); void nfp_cpp_area_free(struct nfp_cpp_area *area); int nfp_cpp_area_acquire(struct nfp_cpp_area *area); int nfp_cpp_area_acquire_nonblocking(struct nfp_cpp_area *area); @@ -239,8 +242,6 @@ int nfp_cpp_area_read(struct nfp_cpp_area *area, unsigned long offset, void *buffer, size_t length); int nfp_cpp_area_write(struct nfp_cpp_area *area, unsigned long offset, const void *buffer, size_t length); -int nfp_cpp_area_check_range(struct nfp_cpp_area *area, - unsigned long long offset, unsigned long size); const char *nfp_cpp_area_name(struct nfp_cpp_area *cpp_area); void *nfp_cpp_area_priv(struct nfp_cpp_area *cpp_area); struct nfp_cpp *nfp_cpp_area_cpp(struct nfp_cpp_area *cpp_area); @@ -278,6 +279,10 @@ int nfp_cpp_readq(struct nfp_cpp *cpp, u32 cpp_id, int nfp_cpp_writeq(struct nfp_cpp *cpp, u32 cpp_id, unsigned long long address, u64 value); +u8 __iomem * +nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, int domain, int target, + u64 addr, unsigned long size, struct nfp_cpp_area **area); + struct nfp_cpp_mutex; int nfp_cpp_mutex_init(struct nfp_cpp *cpp, int target, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index 9b69dcf87be9..04dd5758ecf5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -361,6 +361,41 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest, } /** + * nfp_cpp_area_alloc_acquire() - allocate a new CPP area and lock it down + * @cpp: CPP handle + * @name: Name of region + * @dest: CPP id + * @address: Start address on CPP target + * @size: Size of area + * + * Allocate and initialize a CPP area structure, and lock it down so + * that it can be accessed directly. + * + * NOTE: @address and @size must be 32-bit aligned values. + * + * NOTE: The area must also be 'released' when the structure is freed. + * + * Return: NFP CPP Area handle, or NULL + */ +struct nfp_cpp_area * +nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, const char *name, u32 dest, + unsigned long long address, unsigned long size) +{ + struct nfp_cpp_area *area; + + area = nfp_cpp_area_alloc_with_name(cpp, dest, name, address, size); + if (!area) + return NULL; + + if (nfp_cpp_area_acquire(area)) { + nfp_cpp_area_free(area); + return NULL; + } + + return area; +} + +/** * nfp_cpp_area_free() - free up the CPP area * @area: CPP area handle * @@ -536,27 +571,6 @@ int nfp_cpp_area_write(struct nfp_cpp_area *area, } /** - * nfp_cpp_area_check_range() - check if address range fits in CPP area - * @area: CPP area handle - * @offset: offset into CPP target - * @length: size of address range in bytes - * - * Check if address range fits within CPP area. Return 0 if area - * fits or -EFAULT on error. - * - * Return: 0, or -ERRNO - */ -int nfp_cpp_area_check_range(struct nfp_cpp_area *area, - unsigned long long offset, unsigned long length) -{ - if (offset < area->offset || - offset + length > area->offset + area->size) - return -EFAULT; - - return 0; -} - -/** * nfp_cpp_area_name() - return name of a CPP area * @cpp_area: CPP area handle * diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c index 0ba0379b8f75..ab86bceb93f2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c @@ -279,3 +279,43 @@ exit_release: return err; } + +/** + * nfp_cpp_map_area() - Helper function to map an area + * @cpp: NFP CPP handler + * @name: Name for the area + * @domain: CPP domain + * @target: CPP target + * @addr: CPP address + * @size: Size of the area + * @area: Area handle (output) + * + * Map an area of IOMEM access. To undo the effect of this function call + * @nfp_cpp_area_release_free(*area). + * + * Return: Pointer to memory mapped area or ERR_PTR + */ +u8 __iomem * +nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, int domain, int target, + u64 addr, unsigned long size, struct nfp_cpp_area **area) +{ + u8 __iomem *res; + u32 dest; + + dest = NFP_CPP_ISLAND_ID(target, NFP_CPP_ACTION_RW, 0, domain); + + *area = nfp_cpp_area_alloc_acquire(cpp, name, dest, addr, size); + if (!*area) + goto err_eio; + + res = nfp_cpp_area_iomem(*area); + if (!res) + goto err_release_free; + + return res; + +err_release_free: + nfp_cpp_area_release_free(*area); +err_eio: + return (u8 __iomem *)ERR_PTR(-EIO); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h index d27d29782a12..c9724fb7ea4b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h @@ -97,7 +97,11 @@ int nfp_rtsym_count(struct nfp_rtsym_table *rtbl); const struct nfp_rtsym *nfp_rtsym_get(struct nfp_rtsym_table *rtbl, int idx); const struct nfp_rtsym * nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name); + u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name, int *error); +u8 __iomem * +nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id, + unsigned int min_size, struct nfp_cpp_area **area); #endif /* NFP_NFFW_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c index 203f9cbae0fb..ecda474ac7c3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c @@ -289,3 +289,30 @@ exit: return ~0ULL; return val; } + +u8 __iomem * +nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id, + unsigned int min_size, struct nfp_cpp_area **area) +{ + const struct nfp_rtsym *sym; + u8 __iomem *mem; + + sym = nfp_rtsym_lookup(rtbl, name); + if (!sym) + return (u8 __iomem *)ERR_PTR(-ENOENT); + + if (sym->size < min_size) { + nfp_err(rtbl->cpp, "Symbol %s too small\n", name); + return (u8 __iomem *)ERR_PTR(-EINVAL); + } + + mem = nfp_cpp_map_area(rtbl->cpp, id, sym->domain, sym->target, + sym->addr, sym->size, area); + if (IS_ERR(mem)) { + nfp_err(rtbl->cpp, "Failed to map symbol %s: %ld\n", + name, PTR_ERR(mem)); + return mem; + } + + return mem; +} diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c index e30676515529..6cec2a6a3dcc 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c @@ -174,7 +174,6 @@ netxen_setup_minidump(struct netxen_adapter *adapter) { int err = 0, i; u32 *template, *tmp_buf; - struct netxen_minidump_template_hdr *hdr; err = netxen_get_minidump_template_size(adapter); if (err) { adapter->mdump.fw_supports_md = 0; @@ -218,8 +217,6 @@ netxen_setup_minidump(struct netxen_adapter *adapter) template = (u32 *) adapter->mdump.md_template; for (i = 0; i < adapter->mdump.md_template_size/sizeof(u32); i++) *template++ = __le32_to_cpu(*tmp_buf++); - hdr = (struct netxen_minidump_template_hdr *) - adapter->mdump.md_template; adapter->mdump.md_capture_buff = NULL; adapter->mdump.fw_supports_md = 1; adapter->mdump.md_enabled = 0; diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index 67452380b60e..82dd47068e18 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -5,6 +5,6 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_selftest.o qed_dcbx.o qed_debug.o qed_ptp.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o qed-$(CONFIG_QED_LL2) += qed_ll2.o -qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o +qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o qed_iwarp.o qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o qed-$(CONFIG_QED_FCOE) += qed_fcoe.o diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 14b08ee9e3ad..91003bc6f00b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -210,14 +210,16 @@ struct qed_tunn_update_params { /* The PCI personality is not quite synonymous to protocol ID: * 1. All personalities need CORE connections - * 2. The Ethernet personality may support also the RoCE protocol + * 2. The Ethernet personality may support also the RoCE/iWARP protocol */ enum qed_pci_personality { QED_PCI_ETH, QED_PCI_FCOE, QED_PCI_ISCSI, QED_PCI_ETH_ROCE, - QED_PCI_DEFAULT /* default in shmem */ + QED_PCI_ETH_IWARP, + QED_PCI_ETH_RDMA, + QED_PCI_DEFAULT, /* default in shmem */ }; /* All VFs are symmetric, all counters are PF + all VFs */ @@ -277,6 +279,7 @@ enum qed_dev_cap { QED_DEV_CAP_FCOE, QED_DEV_CAP_ISCSI, QED_DEV_CAP_ROCE, + QED_DEV_CAP_IWARP, }; enum qed_wol_support { @@ -286,7 +289,24 @@ enum qed_wol_support { struct qed_hw_info { /* PCI personality */ - enum qed_pci_personality personality; + enum qed_pci_personality personality; +#define QED_IS_RDMA_PERSONALITY(dev) \ + ((dev)->hw_info.personality == QED_PCI_ETH_ROCE || \ + (dev)->hw_info.personality == QED_PCI_ETH_IWARP || \ + (dev)->hw_info.personality == QED_PCI_ETH_RDMA) +#define QED_IS_ROCE_PERSONALITY(dev) \ + ((dev)->hw_info.personality == QED_PCI_ETH_ROCE || \ + (dev)->hw_info.personality == QED_PCI_ETH_RDMA) +#define QED_IS_IWARP_PERSONALITY(dev) \ + ((dev)->hw_info.personality == QED_PCI_ETH_IWARP || \ + (dev)->hw_info.personality == QED_PCI_ETH_RDMA) +#define QED_IS_L2_PERSONALITY(dev) \ + ((dev)->hw_info.personality == QED_PCI_ETH || \ + QED_IS_RDMA_PERSONALITY(dev)) +#define QED_IS_FCOE_PERSONALITY(dev) \ + ((dev)->hw_info.personality == QED_PCI_FCOE) +#define QED_IS_ISCSI_PERSONALITY(dev) \ + ((dev)->hw_info.personality == QED_PCI_ISCSI) /* Resource Allocation scheme results */ u32 resc_start[QED_MAX_RESC]; @@ -759,7 +779,7 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev, } #define PURE_LB_TC 8 -#define OOO_LB_TC 9 +#define PKT_LB_TC 9 int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate); void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, @@ -769,6 +789,8 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev, void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); int qed_device_num_engines(struct qed_dev *cdev); int qed_device_get_port_id(struct qed_dev *cdev); +void qed_set_fw_mac_addr(__le16 *fw_msb, + __le16 *fw_mid, __le16 *fw_lsb, u8 *mac); #define QED_LEADING_HWFN(dev) (&dev->hwfns[0]) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index e201214764db..af106be8cc08 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -246,14 +246,16 @@ struct qed_cxt_mngr { static bool src_proto(enum protocol_type type) { return type == PROTOCOLID_ISCSI || - type == PROTOCOLID_FCOE; + type == PROTOCOLID_FCOE || + type == PROTOCOLID_IWARP; } static bool tm_cid_proto(enum protocol_type type) { return type == PROTOCOLID_ISCSI || type == PROTOCOLID_FCOE || - type == PROTOCOLID_ROCE; + type == PROTOCOLID_ROCE || + type == PROTOCOLID_IWARP; } static bool tm_tid_proto(enum protocol_type type) @@ -853,7 +855,7 @@ u32 qed_cxt_cfg_ilt_compute_excess(struct qed_hwfn *p_hwfn, u32 used_lines) if (!excess_lines) return 0; - if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE) + if (!QED_IS_RDMA_PERSONALITY(p_hwfn)) return 0; p_mngr = p_hwfn->p_cxt_mngr; @@ -1033,7 +1035,7 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn, u32 lines, line, sz_left, lines_to_skip = 0; /* Special handling for RoCE that supports dynamic allocation */ - if ((p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) && + if (QED_IS_RDMA_PERSONALITY(p_hwfn) && ((ilt_client == ILT_CLI_CDUT) || ilt_client == ILT_CLI_TSDM)) return 0; @@ -1833,7 +1835,7 @@ static void qed_tm_init_pf(struct qed_hwfn *p_hwfn) tm_offset += tm_iids.pf_tids[i]; } - if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) + if (QED_IS_RDMA_PERSONALITY(p_hwfn)) active_seg_mask = 0; STORE_RT_REG(p_hwfn, TM_REG_PF_ENABLE_TASK_RT_OFFSET, active_seg_mask); @@ -2068,6 +2070,11 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs); switch (p_hwfn->hw_info.personality) { + case QED_PCI_ETH_IWARP: + /* Each QP requires one connection */ + num_cons = min_t(u32, IWARP_MAX_QPS, p_params->num_qps); + proto = PROTOCOLID_IWARP; + break; case QED_PCI_ETH_ROCE: num_qps = min_t(u32, ROCE_MAX_QPS, p_params->num_qps); num_cons = num_qps * 2; /* each QP requires two connections */ @@ -2103,6 +2110,8 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks) qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_CORE, core_cids, 0); switch (p_hwfn->hw_info.personality) { + case QED_PCI_ETH_RDMA: + case QED_PCI_ETH_IWARP: case QED_PCI_ETH_ROCE: { qed_rdma_set_pf_params(p_hwfn, @@ -2344,7 +2353,7 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn, last_cid_allocated - 1); if (!p_hwfn->b_rdma_enabled_in_prs) { - /* Enable RoCE search */ + /* Enable RDMA search */ qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1); p_hwfn->b_rdma_enabled_in_prs = true; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 49667ad9042d..6c87bed13bd2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -216,6 +216,10 @@ static u32 qed_get_pq_flags(struct qed_hwfn *p_hwfn) case QED_PCI_ETH_ROCE: flags |= PQ_FLAGS_MCOS | PQ_FLAGS_OFLD | PQ_FLAGS_LLT; break; + case QED_PCI_ETH_IWARP: + flags |= PQ_FLAGS_MCOS | PQ_FLAGS_ACK | PQ_FLAGS_OOO | + PQ_FLAGS_OFLD; + break; default: DP_ERR(p_hwfn, "unknown personality %d\n", p_hwfn->hw_info.personality); @@ -936,9 +940,16 @@ int qed_resc_alloc(struct qed_dev *cdev) /* EQ */ n_eqes = qed_chain_get_capacity(&p_hwfn->p_spq->chain); - if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { + if (QED_IS_RDMA_PERSONALITY(p_hwfn)) { + enum protocol_type rdma_proto; + + if (QED_IS_ROCE_PERSONALITY(p_hwfn)) + rdma_proto = PROTOCOLID_ROCE; + else + rdma_proto = PROTOCOLID_IWARP; + num_cons = qed_cxt_get_proto_cid_count(p_hwfn, - PROTOCOLID_ROCE, + rdma_proto, NULL) * 2; n_eqes += num_cons + 2 * MAX_NUM_VFS_BB; } else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) { @@ -2057,7 +2068,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) qed_int_get_num_sbs(p_hwfn, &sb_cnt); if (IS_ENABLED(CONFIG_QED_RDMA) && - p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { + QED_IS_RDMA_PERSONALITY(p_hwfn)) { /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide * the status blocks equally between L2 / RoCE but with * consideration as to how many l2 queues / cnqs we have. @@ -2068,9 +2079,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) non_l2_sbs = feat_num[QED_RDMA_CNQ]; } - - if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE || - p_hwfn->hw_info.personality == QED_PCI_ETH) { + if (QED_IS_L2_PERSONALITY(p_hwfn)) { /* Start by allocating VF queues, then PF's */ feat_num[QED_VF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_L2_QUEUE), @@ -2083,12 +2092,12 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) QED_VF_L2_QUE)); } - if (p_hwfn->hw_info.personality == QED_PCI_FCOE) + if (QED_IS_FCOE_PERSONALITY(p_hwfn)) feat_num[QED_FCOE_CQ] = min_t(u32, sb_cnt.cnt, RESC_NUM(p_hwfn, QED_CMDQS_CQS)); - if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) + if (QED_IS_ISCSI_PERSONALITY(p_hwfn)) feat_num[QED_ISCSI_CQ] = min_t(u32, sb_cnt.cnt, RESC_NUM(p_hwfn, QED_CMDQS_CQS)); @@ -4122,3 +4131,14 @@ int qed_device_get_port_id(struct qed_dev *cdev) { return (QED_LEADING_HWFN(cdev)->abs_pf_id) % qed_device_num_ports(cdev); } + +void qed_set_fw_mac_addr(__le16 *fw_msb, + __le16 *fw_mid, __le16 *fw_lsb, u8 *mac) +{ + ((u8 *)fw_msb)[0] = mac[1]; + ((u8 *)fw_msb)[1] = mac[0]; + ((u8 *)fw_mid)[0] = mac[3]; + ((u8 *)fw_mid)[1] = mac[2]; + ((u8 *)fw_lsb)[0] = mac[5]; + ((u8 *)fw_lsb)[1] = mac[4]; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 3bf3614b3084..31fb0bffa098 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -46,6 +46,7 @@ #include <linux/qed/fcoe_common.h> #include <linux/qed/eth_common.h> #include <linux/qed/iscsi_common.h> +#include <linux/qed/iwarp_common.h> #include <linux/qed/rdma_common.h> #include <linux/qed/roce_common.h> #include <linux/qed/qed_fcoe_if.h> diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c new file mode 100644 index 000000000000..5cd20da2d4e0 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -0,0 +1,2409 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/spinlock.h> +#include <linux/tcp.h> +#include "qed_cxt.h" +#include "qed_hw.h" +#include "qed_ll2.h" +#include "qed_rdma.h" +#include "qed_reg_addr.h" +#include "qed_sp.h" + +#define QED_IWARP_ORD_DEFAULT 32 +#define QED_IWARP_IRD_DEFAULT 32 +#define QED_IWARP_MAX_FW_MSS 4120 + +#define QED_EP_SIG 0xecabcdef + +struct mpa_v2_hdr { + __be16 ird; + __be16 ord; +}; + +#define MPA_V2_PEER2PEER_MODEL 0x8000 +#define MPA_V2_SEND_RTR 0x4000 /* on ird */ +#define MPA_V2_READ_RTR 0x4000 /* on ord */ +#define MPA_V2_WRITE_RTR 0x8000 +#define MPA_V2_IRD_ORD_MASK 0x3FFF + +#define MPA_REV2(_mpa_rev) ((_mpa_rev) == MPA_NEGOTIATION_TYPE_ENHANCED) + +#define QED_IWARP_INVALID_TCP_CID 0xffffffff +#define QED_IWARP_RCV_WND_SIZE_DEF (256 * 1024) +#define QED_IWARP_RCV_WND_SIZE_MIN (64 * 1024) +#define TIMESTAMP_HEADER_SIZE (12) + +#define QED_IWARP_TS_EN BIT(0) +#define QED_IWARP_DA_EN BIT(1) +#define QED_IWARP_PARAM_CRC_NEEDED (1) +#define QED_IWARP_PARAM_P2P (1) + +static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn, + u8 fw_event_code, u16 echo, + union event_ring_data *data, + u8 fw_return_code); + +/* Override devinfo with iWARP specific values */ +void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + dev->max_inline = IWARP_REQ_MAX_INLINE_DATA_SIZE; + dev->max_qp = min_t(u32, + IWARP_MAX_QPS, + p_hwfn->p_rdma_info->num_qps) - + QED_IWARP_PREALLOC_CNT; + + dev->max_cq = dev->max_qp; + + dev->max_qp_resp_rd_atomic_resc = QED_IWARP_IRD_DEFAULT; + dev->max_qp_req_rd_atomic_resc = QED_IWARP_ORD_DEFAULT; +} + +void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_TCP; + qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1); + p_hwfn->b_rdma_enabled_in_prs = true; +} + +/* We have two cid maps, one for tcp which should be used only from passive + * syn processing and replacing a pre-allocated ep in the list. The second + * for active tcp and for QPs. + */ +static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid) +{ + cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + + if (cid < QED_IWARP_PREALLOC_CNT) + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, + cid); + else + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid); + + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid) +{ + int rc; + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + if (rc) { + DP_NOTICE(p_hwfn, "Failed in allocating iwarp cid\n"); + return rc; + } + *cid += qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto); + + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, *cid); + if (rc) + qed_iwarp_cid_cleaned(p_hwfn, *cid); + + return rc; +} + +static void qed_iwarp_set_tcp_cid(struct qed_hwfn *p_hwfn, u32 cid) +{ + cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_set_id(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, cid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +/* This function allocates a cid for passive tcp (called from syn receive) + * the reason it's separate from the regular cid allocation is because it + * is assured that these cids already have ilt allocated. They are preallocated + * to ensure that we won't need to allocate memory during syn processing + */ +static int qed_iwarp_alloc_tcp_cid(struct qed_hwfn *p_hwfn, u32 *cid) +{ + int rc; + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->tcp_cid_map, cid); + + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "can't allocate iwarp tcp cid max-count=%d\n", + p_hwfn->p_rdma_info->tcp_cid_map.max_count); + + *cid = QED_IWARP_INVALID_TCP_CID; + return rc; + } + + *cid += qed_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + return 0; +} + +int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + struct qed_rdma_create_qp_out_params *out_params) +{ + struct iwarp_create_qp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + u16 physical_queue; + u32 cid; + int rc; + + qp->shared_queue = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + IWARP_SHARED_QUEUE_PAGE_SIZE, + &qp->shared_queue_phys_addr, + GFP_KERNEL); + if (!qp->shared_queue) + return -ENOMEM; + + out_params->sq_pbl_virt = (u8 *)qp->shared_queue + + IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET; + out_params->sq_pbl_phys = qp->shared_queue_phys_addr + + IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET; + out_params->rq_pbl_virt = (u8 *)qp->shared_queue + + IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET; + out_params->rq_pbl_phys = qp->shared_queue_phys_addr + + IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET; + + rc = qed_iwarp_alloc_cid(p_hwfn, &cid); + if (rc) + goto err1; + + qp->icid = (u16)cid; + + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.cid = qp->icid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_CREATE_QP, + PROTOCOLID_IWARP, &init_data); + if (rc) + goto err2; + + p_ramrod = &p_ent->ramrod.iwarp_create_qp; + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_FMR_AND_RESERVED_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_SIGNALED_COMP, qp->signal_all); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG, qp->use_srq); + + p_ramrod->pd = qp->pd; + p_ramrod->sq_num_pages = qp->sq_num_pages; + p_ramrod->rq_num_pages = qp->rq_num_pages; + + p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo); + + p_ramrod->cq_cid_for_sq = + cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->sq_cq_id); + p_ramrod->cq_cid_for_rq = + cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->rq_cq_id); + + p_ramrod->dpi = cpu_to_le16(qp->dpi); + + physical_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + p_ramrod->physical_q0 = cpu_to_le16(physical_queue); + physical_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK); + p_ramrod->physical_q1 = cpu_to_le16(physical_queue); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err2; + + return rc; + +err2: + qed_iwarp_cid_cleaned(p_hwfn, cid); +err1: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + IWARP_SHARED_QUEUE_PAGE_SIZE, + qp->shared_queue, qp->shared_queue_phys_addr); + + return rc; +} + +static int qed_iwarp_modify_fw(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) +{ + struct iwarp_modify_qp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + int rc; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_MODIFY_QP, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.iwarp_modify_qp; + SET_FIELD(p_ramrod->flags, IWARP_MODIFY_QP_RAMROD_DATA_STATE_TRANS_EN, + 0x1); + if (qp->iwarp_state == QED_IWARP_QP_STATE_CLOSING) + p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_CLOSING; + else + p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_ERROR; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x)rc=%d\n", qp->icid, rc); + + return rc; +} + +enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state) +{ + switch (state) { + case QED_ROCE_QP_STATE_RESET: + case QED_ROCE_QP_STATE_INIT: + case QED_ROCE_QP_STATE_RTR: + return QED_IWARP_QP_STATE_IDLE; + case QED_ROCE_QP_STATE_RTS: + return QED_IWARP_QP_STATE_RTS; + case QED_ROCE_QP_STATE_SQD: + return QED_IWARP_QP_STATE_CLOSING; + case QED_ROCE_QP_STATE_ERR: + return QED_IWARP_QP_STATE_ERROR; + case QED_ROCE_QP_STATE_SQE: + return QED_IWARP_QP_STATE_TERMINATE; + default: + return QED_IWARP_QP_STATE_ERROR; + } +} + +static enum qed_roce_qp_state +qed_iwarp2roce_state(enum qed_iwarp_qp_state state) +{ + switch (state) { + case QED_IWARP_QP_STATE_IDLE: + return QED_ROCE_QP_STATE_INIT; + case QED_IWARP_QP_STATE_RTS: + return QED_ROCE_QP_STATE_RTS; + case QED_IWARP_QP_STATE_TERMINATE: + return QED_ROCE_QP_STATE_SQE; + case QED_IWARP_QP_STATE_CLOSING: + return QED_ROCE_QP_STATE_SQD; + case QED_IWARP_QP_STATE_ERROR: + return QED_ROCE_QP_STATE_ERR; + default: + return QED_ROCE_QP_STATE_ERR; + } +} + +const char *iwarp_state_names[] = { + "IDLE", + "RTS", + "TERMINATE", + "CLOSING", + "ERROR", +}; + +int +qed_iwarp_modify_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + enum qed_iwarp_qp_state new_state, bool internal) +{ + enum qed_iwarp_qp_state prev_iw_state; + bool modify_fw = false; + int rc = 0; + + /* modify QP can be called from upper-layer or as a result of async + * RST/FIN... therefore need to protect + */ + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock); + prev_iw_state = qp->iwarp_state; + + if (prev_iw_state == new_state) { + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock); + return 0; + } + + switch (prev_iw_state) { + case QED_IWARP_QP_STATE_IDLE: + switch (new_state) { + case QED_IWARP_QP_STATE_RTS: + qp->iwarp_state = QED_IWARP_QP_STATE_RTS; + break; + case QED_IWARP_QP_STATE_ERROR: + qp->iwarp_state = QED_IWARP_QP_STATE_ERROR; + if (!internal) + modify_fw = true; + break; + default: + break; + } + break; + case QED_IWARP_QP_STATE_RTS: + switch (new_state) { + case QED_IWARP_QP_STATE_CLOSING: + if (!internal) + modify_fw = true; + + qp->iwarp_state = QED_IWARP_QP_STATE_CLOSING; + break; + case QED_IWARP_QP_STATE_ERROR: + if (!internal) + modify_fw = true; + qp->iwarp_state = QED_IWARP_QP_STATE_ERROR; + break; + default: + break; + } + break; + case QED_IWARP_QP_STATE_ERROR: + switch (new_state) { + case QED_IWARP_QP_STATE_IDLE: + + qp->iwarp_state = new_state; + break; + case QED_IWARP_QP_STATE_CLOSING: + /* could happen due to race... do nothing.... */ + break; + default: + rc = -EINVAL; + } + break; + case QED_IWARP_QP_STATE_TERMINATE: + case QED_IWARP_QP_STATE_CLOSING: + qp->iwarp_state = new_state; + break; + default: + break; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) %s --> %s%s\n", + qp->icid, + iwarp_state_names[prev_iw_state], + iwarp_state_names[qp->iwarp_state], + internal ? "internal" : ""); + + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock); + + if (modify_fw) + rc = qed_iwarp_modify_fw(p_hwfn, qp); + + return rc; +} + +int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) +{ + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + int rc; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_DESTROY_QP, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + return rc; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) rc = %d\n", qp->icid, rc); + + return rc; +} + +static void qed_iwarp_destroy_ep(struct qed_hwfn *p_hwfn, + struct qed_iwarp_ep *ep, + bool remove_from_active_list) +{ + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*ep->ep_buffer_virt), + ep->ep_buffer_virt, ep->ep_buffer_phys); + + if (remove_from_active_list) { + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + list_del(&ep->list_entry); + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } + + if (ep->qp) + ep->qp->ep = NULL; + + kfree(ep); +} + +int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) +{ + struct qed_iwarp_ep *ep = qp->ep; + int wait_count = 0; + int rc = 0; + + if (qp->iwarp_state != QED_IWARP_QP_STATE_ERROR) { + rc = qed_iwarp_modify_qp(p_hwfn, qp, + QED_IWARP_QP_STATE_ERROR, false); + if (rc) + return rc; + } + + /* Make sure ep is closed before returning and freeing memory. */ + if (ep) { + while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200) + msleep(100); + + if (ep->state != QED_IWARP_EP_CLOSED) + DP_NOTICE(p_hwfn, "ep state close timeout state=%x\n", + ep->state); + + qed_iwarp_destroy_ep(p_hwfn, ep, false); + } + + rc = qed_iwarp_fw_destroy(p_hwfn, qp); + + if (qp->shared_queue) + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + IWARP_SHARED_QUEUE_PAGE_SIZE, + qp->shared_queue, qp->shared_queue_phys_addr); + + return rc; +} + +static int +qed_iwarp_create_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep **ep_out) +{ + struct qed_iwarp_ep *ep; + int rc; + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + return -ENOMEM; + + ep->state = QED_IWARP_EP_INIT; + + ep->ep_buffer_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*ep->ep_buffer_virt), + &ep->ep_buffer_phys, + GFP_KERNEL); + if (!ep->ep_buffer_virt) { + rc = -ENOMEM; + goto err; + } + + ep->sig = QED_EP_SIG; + + *ep_out = ep; + + return 0; + +err: + kfree(ep); + return rc; +} + +static void +qed_iwarp_print_tcp_ramrod(struct qed_hwfn *p_hwfn, + struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "local_mac=%x %x %x, remote_mac=%x %x %x\n", + p_tcp_ramrod->tcp.local_mac_addr_lo, + p_tcp_ramrod->tcp.local_mac_addr_mid, + p_tcp_ramrod->tcp.local_mac_addr_hi, + p_tcp_ramrod->tcp.remote_mac_addr_lo, + p_tcp_ramrod->tcp.remote_mac_addr_mid, + p_tcp_ramrod->tcp.remote_mac_addr_hi); + + if (p_tcp_ramrod->tcp.ip_version == TCP_IPV4) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "local_ip=%pI4h:%x, remote_ip=%pI4h%x, vlan=%x\n", + p_tcp_ramrod->tcp.local_ip, + p_tcp_ramrod->tcp.local_port, + p_tcp_ramrod->tcp.remote_ip, + p_tcp_ramrod->tcp.remote_port, + p_tcp_ramrod->tcp.vlan_id); + } else { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "local_ip=%pI6h:%x, remote_ip=%pI6h:%x, vlan=%x\n", + p_tcp_ramrod->tcp.local_ip, + p_tcp_ramrod->tcp.local_port, + p_tcp_ramrod->tcp.remote_ip, + p_tcp_ramrod->tcp.remote_port, + p_tcp_ramrod->tcp.vlan_id); + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "flow_label=%x, ttl=%x, tos_or_tc=%x, mss=%x, rcv_wnd_scale=%x, connect_mode=%x, flags=%x\n", + p_tcp_ramrod->tcp.flow_label, + p_tcp_ramrod->tcp.ttl, + p_tcp_ramrod->tcp.tos_or_tc, + p_tcp_ramrod->tcp.mss, + p_tcp_ramrod->tcp.rcv_wnd_scale, + p_tcp_ramrod->tcp.connect_mode, + p_tcp_ramrod->tcp.flags); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "syn_ip_payload_length=%x, lo=%x, hi=%x\n", + p_tcp_ramrod->tcp.syn_ip_payload_length, + p_tcp_ramrod->tcp.syn_phy_addr_lo, + p_tcp_ramrod->tcp.syn_phy_addr_hi); +} + +static int +qed_iwarp_tcp_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep) +{ + struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod; + struct tcp_offload_params_opt2 *tcp; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t async_output_phys; + dma_addr_t in_pdata_phys; + u16 physical_q; + u8 tcp_flags; + int rc; + int i; + + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = ep->tcp_cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + if (ep->connect_mode == TCP_CONNECT_PASSIVE) + init_data.comp_mode = QED_SPQ_MODE_CB; + else + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_TCP_OFFLOAD, + PROTOCOLID_IWARP, &init_data); + if (rc) + return rc; + + p_tcp_ramrod = &p_ent->ramrod.iwarp_tcp_offload; + + in_pdata_phys = ep->ep_buffer_phys + + offsetof(struct qed_iwarp_ep_memory, in_pdata); + DMA_REGPAIR_LE(p_tcp_ramrod->iwarp.incoming_ulp_buffer.addr, + in_pdata_phys); + + p_tcp_ramrod->iwarp.incoming_ulp_buffer.len = + cpu_to_le16(sizeof(ep->ep_buffer_virt->in_pdata)); + + async_output_phys = ep->ep_buffer_phys + + offsetof(struct qed_iwarp_ep_memory, async_output); + DMA_REGPAIR_LE(p_tcp_ramrod->iwarp.async_eqe_output_buf, + async_output_phys); + + p_tcp_ramrod->iwarp.handle_for_async.hi = cpu_to_le32(PTR_HI(ep)); + p_tcp_ramrod->iwarp.handle_for_async.lo = cpu_to_le32(PTR_LO(ep)); + + physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD); + p_tcp_ramrod->iwarp.physical_q0 = cpu_to_le16(physical_q); + physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK); + p_tcp_ramrod->iwarp.physical_q1 = cpu_to_le16(physical_q); + p_tcp_ramrod->iwarp.mpa_mode = iwarp_info->mpa_rev; + + tcp = &p_tcp_ramrod->tcp; + qed_set_fw_mac_addr(&tcp->remote_mac_addr_hi, + &tcp->remote_mac_addr_mid, + &tcp->remote_mac_addr_lo, ep->remote_mac_addr); + qed_set_fw_mac_addr(&tcp->local_mac_addr_hi, &tcp->local_mac_addr_mid, + &tcp->local_mac_addr_lo, ep->local_mac_addr); + + tcp->vlan_id = cpu_to_le16(ep->cm_info.vlan); + + tcp_flags = p_hwfn->p_rdma_info->iwarp.tcp_flags; + tcp->flags = 0; + SET_FIELD(tcp->flags, TCP_OFFLOAD_PARAMS_OPT2_TS_EN, + !!(tcp_flags & QED_IWARP_TS_EN)); + + SET_FIELD(tcp->flags, TCP_OFFLOAD_PARAMS_OPT2_DA_EN, + !!(tcp_flags & QED_IWARP_DA_EN)); + + tcp->ip_version = ep->cm_info.ip_version; + + for (i = 0; i < 4; i++) { + tcp->remote_ip[i] = cpu_to_le32(ep->cm_info.remote_ip[i]); + tcp->local_ip[i] = cpu_to_le32(ep->cm_info.local_ip[i]); + } + + tcp->remote_port = cpu_to_le16(ep->cm_info.remote_port); + tcp->local_port = cpu_to_le16(ep->cm_info.local_port); + tcp->mss = cpu_to_le16(ep->mss); + tcp->flow_label = 0; + tcp->ttl = 0x40; + tcp->tos_or_tc = 0; + + tcp->rcv_wnd_scale = (u8)p_hwfn->p_rdma_info->iwarp.rcv_wnd_scale; + tcp->connect_mode = ep->connect_mode; + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + tcp->syn_ip_payload_length = + cpu_to_le16(ep->syn_ip_payload_length); + tcp->syn_phy_addr_hi = DMA_HI_LE(ep->syn_phy_addr); + tcp->syn_phy_addr_lo = DMA_LO_LE(ep->syn_phy_addr); + } + + qed_iwarp_print_tcp_ramrod(p_hwfn, p_tcp_ramrod); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "EP(0x%x) Offload completed rc=%d\n", ep->tcp_cid, rc); + + return rc; +} + +static void +qed_iwarp_mpa_received(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep) +{ + struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct qed_iwarp_cm_event_params params; + struct mpa_v2_hdr *mpa_v2; + union async_output *async_data; + u16 mpa_ord, mpa_ird; + u8 mpa_hdr_size = 0; + u8 mpa_rev; + + async_data = &ep->ep_buffer_virt->async_output; + + mpa_rev = async_data->mpa_request.mpa_handshake_mode; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "private_data_len=%x handshake_mode=%x private_data=(%x)\n", + async_data->mpa_request.ulp_data_len, + mpa_rev, *((u32 *)(ep->ep_buffer_virt->in_pdata))); + + if (mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) { + /* Read ord/ird values from private data buffer */ + mpa_v2 = (struct mpa_v2_hdr *)ep->ep_buffer_virt->in_pdata; + mpa_hdr_size = sizeof(*mpa_v2); + + mpa_ord = ntohs(mpa_v2->ord); + mpa_ird = ntohs(mpa_v2->ird); + + /* Temprary store in cm_info incoming ord/ird requested, later + * replace with negotiated value during accept + */ + ep->cm_info.ord = (u8)min_t(u16, + (mpa_ord & MPA_V2_IRD_ORD_MASK), + QED_IWARP_ORD_DEFAULT); + + ep->cm_info.ird = (u8)min_t(u16, + (mpa_ird & MPA_V2_IRD_ORD_MASK), + QED_IWARP_IRD_DEFAULT); + + /* Peer2Peer negotiation */ + ep->rtr_type = MPA_RTR_TYPE_NONE; + if (mpa_ird & MPA_V2_PEER2PEER_MODEL) { + if (mpa_ord & MPA_V2_WRITE_RTR) + ep->rtr_type |= MPA_RTR_TYPE_ZERO_WRITE; + + if (mpa_ord & MPA_V2_READ_RTR) + ep->rtr_type |= MPA_RTR_TYPE_ZERO_READ; + + if (mpa_ird & MPA_V2_SEND_RTR) + ep->rtr_type |= MPA_RTR_TYPE_ZERO_SEND; + + ep->rtr_type &= iwarp_info->rtr_type; + + /* if we're left with no match send our capabilities */ + if (ep->rtr_type == MPA_RTR_TYPE_NONE) + ep->rtr_type = iwarp_info->rtr_type; + } + + ep->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED; + } else { + ep->cm_info.ord = QED_IWARP_ORD_DEFAULT; + ep->cm_info.ird = QED_IWARP_IRD_DEFAULT; + ep->mpa_rev = MPA_NEGOTIATION_TYPE_BASIC; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x rtr:0x%x ulp_data_len = %x mpa_hdr_size = %x\n", + mpa_rev, ep->cm_info.ord, ep->cm_info.ird, ep->rtr_type, + async_data->mpa_request.ulp_data_len, mpa_hdr_size); + + /* Strip mpa v2 hdr from private data before sending to upper layer */ + ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_hdr_size; + + ep->cm_info.private_data_len = async_data->mpa_request.ulp_data_len - + mpa_hdr_size; + + params.event = QED_IWARP_EVENT_MPA_REQUEST; + params.cm_info = &ep->cm_info; + params.ep_context = ep; + params.status = 0; + + ep->state = QED_IWARP_EP_MPA_REQ_RCVD; + ep->event_cb(ep->cb_context, ¶ms); +} + +static int +qed_iwarp_mpa_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep) +{ + struct iwarp_mpa_offload_ramrod_data *p_mpa_ramrod; + struct qed_sp_init_data init_data; + dma_addr_t async_output_phys; + struct qed_spq_entry *p_ent; + dma_addr_t out_pdata_phys; + dma_addr_t in_pdata_phys; + struct qed_rdma_qp *qp; + bool reject; + int rc; + + if (!ep) + return -EINVAL; + + qp = ep->qp; + reject = !qp; + + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = reject ? ep->tcp_cid : qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + + if (ep->connect_mode == TCP_CONNECT_ACTIVE) + init_data.comp_mode = QED_SPQ_MODE_CB; + else + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_MPA_OFFLOAD, + PROTOCOLID_IWARP, &init_data); + if (rc) + return rc; + + p_mpa_ramrod = &p_ent->ramrod.iwarp_mpa_offload; + out_pdata_phys = ep->ep_buffer_phys + + offsetof(struct qed_iwarp_ep_memory, out_pdata); + DMA_REGPAIR_LE(p_mpa_ramrod->common.outgoing_ulp_buffer.addr, + out_pdata_phys); + p_mpa_ramrod->common.outgoing_ulp_buffer.len = + ep->cm_info.private_data_len; + p_mpa_ramrod->common.crc_needed = p_hwfn->p_rdma_info->iwarp.crc_needed; + + p_mpa_ramrod->common.out_rq.ord = ep->cm_info.ord; + p_mpa_ramrod->common.out_rq.ird = ep->cm_info.ird; + + p_mpa_ramrod->tcp_cid = p_hwfn->hw_info.opaque_fid << 16 | ep->tcp_cid; + + in_pdata_phys = ep->ep_buffer_phys + + offsetof(struct qed_iwarp_ep_memory, in_pdata); + p_mpa_ramrod->tcp_connect_side = ep->connect_mode; + DMA_REGPAIR_LE(p_mpa_ramrod->incoming_ulp_buffer.addr, + in_pdata_phys); + p_mpa_ramrod->incoming_ulp_buffer.len = + cpu_to_le16(sizeof(ep->ep_buffer_virt->in_pdata)); + async_output_phys = ep->ep_buffer_phys + + offsetof(struct qed_iwarp_ep_memory, async_output); + DMA_REGPAIR_LE(p_mpa_ramrod->async_eqe_output_buf, + async_output_phys); + p_mpa_ramrod->handle_for_async.hi = cpu_to_le32(PTR_HI(ep)); + p_mpa_ramrod->handle_for_async.lo = cpu_to_le32(PTR_LO(ep)); + + if (!reject) { + DMA_REGPAIR_LE(p_mpa_ramrod->shared_queue_addr, + qp->shared_queue_phys_addr); + p_mpa_ramrod->stats_counter_id = + RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + qp->stats_queue; + } else { + p_mpa_ramrod->common.reject = 1; + } + + p_mpa_ramrod->mode = ep->mpa_rev; + SET_FIELD(p_mpa_ramrod->rtr_pref, + IWARP_MPA_OFFLOAD_RAMROD_DATA_RTR_SUPPORTED, ep->rtr_type); + + ep->state = QED_IWARP_EP_MPA_OFFLOADED; + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (!reject) + ep->cid = qp->icid; /* Now they're migrated. */ + + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "QP(0x%x) EP(0x%x) MPA Offload rc = %d IRD=0x%x ORD=0x%x rtr_type=%d mpa_rev=%d reject=%d\n", + reject ? 0xffff : qp->icid, + ep->tcp_cid, + rc, + ep->cm_info.ird, + ep->cm_info.ord, ep->rtr_type, ep->mpa_rev, reject); + return rc; +} + +static void +qed_iwarp_return_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep) +{ + ep->state = QED_IWARP_EP_INIT; + if (ep->qp) + ep->qp->ep = NULL; + ep->qp = NULL; + memset(&ep->cm_info, 0, sizeof(ep->cm_info)); + + if (ep->tcp_cid == QED_IWARP_INVALID_TCP_CID) { + /* We don't care about the return code, it's ok if tcp_cid + * remains invalid...in this case we'll defer allocation + */ + qed_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid); + } + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + list_del(&ep->list_entry); + list_add_tail(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_free_list); + + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); +} + +void +qed_iwarp_parse_private_data(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep) +{ + struct mpa_v2_hdr *mpa_v2_params; + union async_output *async_data; + u16 mpa_ird, mpa_ord; + u8 mpa_data_size = 0; + + if (MPA_REV2(p_hwfn->p_rdma_info->iwarp.mpa_rev)) { + mpa_v2_params = + (struct mpa_v2_hdr *)(ep->ep_buffer_virt->in_pdata); + mpa_data_size = sizeof(*mpa_v2_params); + mpa_ird = ntohs(mpa_v2_params->ird); + mpa_ord = ntohs(mpa_v2_params->ord); + + ep->cm_info.ird = (u8)(mpa_ord & MPA_V2_IRD_ORD_MASK); + ep->cm_info.ord = (u8)(mpa_ird & MPA_V2_IRD_ORD_MASK); + } + async_data = &ep->ep_buffer_virt->async_output; + + ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_data_size; + ep->cm_info.private_data_len = async_data->mpa_response.ulp_data_len - + mpa_data_size; +} + +void +qed_iwarp_mpa_reply_arrived(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep) +{ + struct qed_iwarp_cm_event_params params; + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + DP_NOTICE(p_hwfn, + "MPA reply event not expected on passive side!\n"); + return; + } + + params.event = QED_IWARP_EVENT_ACTIVE_MPA_REPLY; + + qed_iwarp_parse_private_data(p_hwfn, ep); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n", + ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird); + + params.cm_info = &ep->cm_info; + params.ep_context = ep; + params.status = 0; + + ep->mpa_reply_processed = true; + + ep->event_cb(ep->cb_context, ¶ms); +} + +#define QED_IWARP_CONNECT_MODE_STRING(ep) \ + ((ep)->connect_mode == TCP_CONNECT_PASSIVE) ? "Passive" : "Active" + +/* Called as a result of the event: + * IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE + */ +static void +qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn, + struct qed_iwarp_ep *ep, u8 fw_return_code) +{ + struct qed_iwarp_cm_event_params params; + + if (ep->connect_mode == TCP_CONNECT_ACTIVE) + params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE; + else + params.event = QED_IWARP_EVENT_PASSIVE_COMPLETE; + + if (ep->connect_mode == TCP_CONNECT_ACTIVE && !ep->mpa_reply_processed) + qed_iwarp_parse_private_data(p_hwfn, ep); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n", + ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird); + + params.cm_info = &ep->cm_info; + + params.ep_context = ep; + + ep->state = QED_IWARP_EP_CLOSED; + + switch (fw_return_code) { + case RDMA_RETURN_OK: + ep->qp->max_rd_atomic_req = ep->cm_info.ord; + ep->qp->max_rd_atomic_resp = ep->cm_info.ird; + qed_iwarp_modify_qp(p_hwfn, ep->qp, QED_IWARP_QP_STATE_RTS, 1); + ep->state = QED_IWARP_EP_ESTABLISHED; + params.status = 0; + break; + case IWARP_CONN_ERROR_MPA_TIMEOUT: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA timeout\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = -EBUSY; + break; + case IWARP_CONN_ERROR_MPA_ERROR_REJECT: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA Reject\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = -ECONNREFUSED; + break; + case IWARP_CONN_ERROR_MPA_RST: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA reset(tcp cid: 0x%x)\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid, + ep->tcp_cid); + params.status = -ECONNRESET; + break; + case IWARP_CONN_ERROR_MPA_FIN: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA received FIN\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = -ECONNREFUSED; + break; + case IWARP_CONN_ERROR_MPA_INSUF_IRD: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA insufficient ird\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = -ECONNREFUSED; + break; + case IWARP_CONN_ERROR_MPA_RTR_MISMATCH: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA RTR MISMATCH\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = -ECONNREFUSED; + break; + case IWARP_CONN_ERROR_MPA_INVALID_PACKET: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA Invalid Packet\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = -ECONNREFUSED; + break; + case IWARP_CONN_ERROR_MPA_LOCAL_ERROR: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA Local Error\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = -ECONNREFUSED; + break; + case IWARP_CONN_ERROR_MPA_TERMINATE: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA TERMINATE\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid); + params.status = -ECONNREFUSED; + break; + default: + params.status = -ECONNRESET; + break; + } + + ep->event_cb(ep->cb_context, ¶ms); + + /* on passive side, if there is no associated QP (REJECT) we need to + * return the ep to the pool, (in the regular case we add an element + * in accept instead of this one. + * In both cases we need to remove it from the ep_list. + */ + if (fw_return_code != RDMA_RETURN_OK) { + ep->tcp_cid = QED_IWARP_INVALID_TCP_CID; + if ((ep->connect_mode == TCP_CONNECT_PASSIVE) && + (!ep->qp)) { /* Rejected */ + qed_iwarp_return_ep(p_hwfn, ep); + } else { + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + list_del(&ep->list_entry); + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } + } +} + +static void +qed_iwarp_mpa_v2_set_private(struct qed_hwfn *p_hwfn, + struct qed_iwarp_ep *ep, u8 *mpa_data_size) +{ + struct mpa_v2_hdr *mpa_v2_params; + u16 mpa_ird, mpa_ord; + + *mpa_data_size = 0; + if (MPA_REV2(ep->mpa_rev)) { + mpa_v2_params = + (struct mpa_v2_hdr *)ep->ep_buffer_virt->out_pdata; + *mpa_data_size = sizeof(*mpa_v2_params); + + mpa_ird = (u16)ep->cm_info.ird; + mpa_ord = (u16)ep->cm_info.ord; + + if (ep->rtr_type != MPA_RTR_TYPE_NONE) { + mpa_ird |= MPA_V2_PEER2PEER_MODEL; + + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_SEND) + mpa_ird |= MPA_V2_SEND_RTR; + + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_WRITE) + mpa_ord |= MPA_V2_WRITE_RTR; + + if (ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) + mpa_ord |= MPA_V2_READ_RTR; + } + + mpa_v2_params->ird = htons(mpa_ird); + mpa_v2_params->ord = htons(mpa_ord); + + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA_NEGOTIATE Header: [%x ord:%x ird] %x ord:%x ird:%x peer2peer:%x rtr_send:%x rtr_write:%x rtr_read:%x\n", + mpa_v2_params->ird, + mpa_v2_params->ord, + *((u32 *)mpa_v2_params), + mpa_ord & MPA_V2_IRD_ORD_MASK, + mpa_ird & MPA_V2_IRD_ORD_MASK, + !!(mpa_ird & MPA_V2_PEER2PEER_MODEL), + !!(mpa_ird & MPA_V2_SEND_RTR), + !!(mpa_ord & MPA_V2_WRITE_RTR), + !!(mpa_ord & MPA_V2_READ_RTR)); + } +} + +int qed_iwarp_connect(void *rdma_cxt, + struct qed_iwarp_connect_in *iparams, + struct qed_iwarp_connect_out *oparams) +{ + struct qed_hwfn *p_hwfn = rdma_cxt; + struct qed_iwarp_info *iwarp_info; + struct qed_iwarp_ep *ep; + u8 mpa_data_size = 0; + u8 ts_hdr_size = 0; + u32 cid; + int rc; + + if ((iparams->cm_info.ord > QED_IWARP_ORD_DEFAULT) || + (iparams->cm_info.ird > QED_IWARP_IRD_DEFAULT)) { + DP_NOTICE(p_hwfn, + "QP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n", + iparams->qp->icid, iparams->cm_info.ord, + iparams->cm_info.ird); + + return -EINVAL; + } + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + + /* Allocate ep object */ + rc = qed_iwarp_alloc_cid(p_hwfn, &cid); + if (rc) + return rc; + + rc = qed_iwarp_create_ep(p_hwfn, &ep); + if (rc) + goto err; + + ep->tcp_cid = cid; + + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + list_add_tail(&ep->list_entry, &p_hwfn->p_rdma_info->iwarp.ep_list); + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + ep->qp = iparams->qp; + ep->qp->ep = ep; + ether_addr_copy(ep->remote_mac_addr, iparams->remote_mac_addr); + ether_addr_copy(ep->local_mac_addr, iparams->local_mac_addr); + memcpy(&ep->cm_info, &iparams->cm_info, sizeof(ep->cm_info)); + + ep->cm_info.ord = iparams->cm_info.ord; + ep->cm_info.ird = iparams->cm_info.ird; + + ep->rtr_type = iwarp_info->rtr_type; + if (!iwarp_info->peer2peer) + ep->rtr_type = MPA_RTR_TYPE_NONE; + + if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) && (ep->cm_info.ord == 0)) + ep->cm_info.ord = 1; + + ep->mpa_rev = iwarp_info->mpa_rev; + + qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size); + + ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata; + ep->cm_info.private_data_len = iparams->cm_info.private_data_len + + mpa_data_size; + + memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size, + iparams->cm_info.private_data, + iparams->cm_info.private_data_len); + + if (p_hwfn->p_rdma_info->iwarp.tcp_flags & QED_IWARP_TS_EN) + ts_hdr_size = TIMESTAMP_HEADER_SIZE; + + ep->mss = iparams->mss - ts_hdr_size; + ep->mss = min_t(u16, QED_IWARP_MAX_FW_MSS, ep->mss); + + ep->event_cb = iparams->event_cb; + ep->cb_context = iparams->cb_context; + ep->connect_mode = TCP_CONNECT_ACTIVE; + + oparams->ep_context = ep; + + rc = qed_iwarp_tcp_offload(p_hwfn, ep); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x) rc = %d\n", + iparams->qp->icid, ep->tcp_cid, rc); + + if (rc) { + qed_iwarp_destroy_ep(p_hwfn, ep, true); + goto err; + } + + return rc; +err: + qed_iwarp_cid_cleaned(p_hwfn, cid); + + return rc; +} + +static struct qed_iwarp_ep *qed_iwarp_get_free_ep(struct qed_hwfn *p_hwfn) +{ + struct qed_iwarp_ep *ep = NULL; + int rc; + + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + if (list_empty(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) { + DP_ERR(p_hwfn, "Ep list is empty\n"); + goto out; + } + + ep = list_first_entry(&p_hwfn->p_rdma_info->iwarp.ep_free_list, + struct qed_iwarp_ep, list_entry); + + /* in some cases we could have failed allocating a tcp cid when added + * from accept / failure... retry now..this is not the common case. + */ + if (ep->tcp_cid == QED_IWARP_INVALID_TCP_CID) { + rc = qed_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid); + + /* if we fail we could look for another entry with a valid + * tcp_cid, but since we don't expect to reach this anyway + * it's not worth the handling + */ + if (rc) { + ep->tcp_cid = QED_IWARP_INVALID_TCP_CID; + ep = NULL; + goto out; + } + } + + list_del(&ep->list_entry); + +out: + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + return ep; +} + +#define QED_IWARP_MAX_CID_CLEAN_TIME 100 +#define QED_IWARP_MAX_NO_PROGRESS_CNT 5 + +/* This function waits for all the bits of a bmap to be cleared, as long as + * there is progress ( i.e. the number of bits left to be cleared decreases ) + * the function continues. + */ +static int +qed_iwarp_wait_cid_map_cleared(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap) +{ + int prev_weight = 0; + int wait_count = 0; + int weight = 0; + + weight = bitmap_weight(bmap->bitmap, bmap->max_count); + prev_weight = weight; + + while (weight) { + msleep(QED_IWARP_MAX_CID_CLEAN_TIME); + + weight = bitmap_weight(bmap->bitmap, bmap->max_count); + + if (prev_weight == weight) { + wait_count++; + } else { + prev_weight = weight; + wait_count = 0; + } + + if (wait_count > QED_IWARP_MAX_NO_PROGRESS_CNT) { + DP_NOTICE(p_hwfn, + "%s bitmap wait timed out (%d cids pending)\n", + bmap->name, weight); + return -EBUSY; + } + } + return 0; +} + +static int qed_iwarp_wait_for_all_cids(struct qed_hwfn *p_hwfn) +{ + int rc; + int i; + + rc = qed_iwarp_wait_cid_map_cleared(p_hwfn, + &p_hwfn->p_rdma_info->tcp_cid_map); + if (rc) + return rc; + + /* Now free the tcp cids from the main cid map */ + for (i = 0; i < QED_IWARP_PREALLOC_CNT; i++) + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, i); + + /* Now wait for all cids to be completed */ + return qed_iwarp_wait_cid_map_cleared(p_hwfn, + &p_hwfn->p_rdma_info->cid_map); +} + +static void qed_iwarp_free_prealloc_ep(struct qed_hwfn *p_hwfn) +{ + struct qed_iwarp_ep *ep; + + while (!list_empty(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) { + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + ep = list_first_entry(&p_hwfn->p_rdma_info->iwarp.ep_free_list, + struct qed_iwarp_ep, list_entry); + + if (!ep) { + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + break; + } + list_del(&ep->list_entry); + + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + if (ep->tcp_cid != QED_IWARP_INVALID_TCP_CID) + qed_iwarp_cid_cleaned(p_hwfn, ep->tcp_cid); + + qed_iwarp_destroy_ep(p_hwfn, ep, false); + } +} + +static int qed_iwarp_prealloc_ep(struct qed_hwfn *p_hwfn, bool init) +{ + struct qed_iwarp_ep *ep; + int rc = 0; + int count; + u32 cid; + int i; + + count = init ? QED_IWARP_PREALLOC_CNT : 1; + for (i = 0; i < count; i++) { + rc = qed_iwarp_create_ep(p_hwfn, &ep); + if (rc) + return rc; + + /* During initialization we allocate from the main pool, + * afterwards we allocate only from the tcp_cid. + */ + if (init) { + rc = qed_iwarp_alloc_cid(p_hwfn, &cid); + if (rc) + goto err; + qed_iwarp_set_tcp_cid(p_hwfn, cid); + } else { + /* We don't care about the return code, it's ok if + * tcp_cid remains invalid...in this case we'll + * defer allocation + */ + qed_iwarp_alloc_tcp_cid(p_hwfn, &cid); + } + + ep->tcp_cid = cid; + + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + list_add_tail(&ep->list_entry, + &p_hwfn->p_rdma_info->iwarp.ep_free_list); + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } + + return rc; + +err: + qed_iwarp_destroy_ep(p_hwfn, ep, false); + + return rc; +} + +int qed_iwarp_alloc(struct qed_hwfn *p_hwfn) +{ + int rc; + + /* Allocate bitmap for tcp cid. These are used by passive side + * to ensure it can allocate a tcp cid during dpc that was + * pre-acquired and doesn't require dynamic allocation of ilt + */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, + QED_IWARP_PREALLOC_CNT, "TCP_CID"); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate tcp cid, rc = %d\n", rc); + return rc; + } + + INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_free_list); + spin_lock_init(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + return qed_iwarp_prealloc_ep(p_hwfn, true); +} + +void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn) +{ + qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1); +} + +int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams) +{ + struct qed_hwfn *p_hwfn = rdma_cxt; + struct qed_iwarp_ep *ep; + u8 mpa_data_size = 0; + int rc; + + ep = iparams->ep_context; + if (!ep) { + DP_ERR(p_hwfn, "Ep Context receive in accept is NULL\n"); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x)\n", + iparams->qp->icid, ep->tcp_cid); + + if ((iparams->ord > QED_IWARP_ORD_DEFAULT) || + (iparams->ird > QED_IWARP_IRD_DEFAULT)) { + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "QP(0x%x) EP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n", + iparams->qp->icid, + ep->tcp_cid, iparams->ord, iparams->ord); + return -EINVAL; + } + + qed_iwarp_prealloc_ep(p_hwfn, false); + + ep->cb_context = iparams->cb_context; + ep->qp = iparams->qp; + ep->qp->ep = ep; + + if (ep->mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) { + /* Negotiate ord/ird: if upperlayer requested ord larger than + * ird advertised by remote, we need to decrease our ord + */ + if (iparams->ord > ep->cm_info.ird) + iparams->ord = ep->cm_info.ird; + + if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) && + (iparams->ird == 0)) + iparams->ird = 1; + } + + /* Update cm_info ord/ird to be negotiated values */ + ep->cm_info.ord = iparams->ord; + ep->cm_info.ird = iparams->ird; + + qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size); + + ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata; + ep->cm_info.private_data_len = iparams->private_data_len + + mpa_data_size; + + memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size, + iparams->private_data, iparams->private_data_len); + + rc = qed_iwarp_mpa_offload(p_hwfn, ep); + if (rc) + qed_iwarp_modify_qp(p_hwfn, + iparams->qp, QED_IWARP_QP_STATE_ERROR, 1); + + return rc; +} + +int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams) +{ + struct qed_hwfn *p_hwfn = rdma_cxt; + struct qed_iwarp_ep *ep; + u8 mpa_data_size = 0; + + ep = iparams->ep_context; + if (!ep) { + DP_ERR(p_hwfn, "Ep Context receive in reject is NULL\n"); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "EP(0x%x)\n", ep->tcp_cid); + + ep->cb_context = iparams->cb_context; + ep->qp = NULL; + + qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size); + + ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata; + ep->cm_info.private_data_len = iparams->private_data_len + + mpa_data_size; + + memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size, + iparams->private_data, iparams->private_data_len); + + return qed_iwarp_mpa_offload(p_hwfn, ep); +} + +static void +qed_iwarp_print_cm_info(struct qed_hwfn *p_hwfn, + struct qed_iwarp_cm_info *cm_info) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "ip_version = %d\n", + cm_info->ip_version); + + if (cm_info->ip_version == QED_TCP_IPV4) + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "remote_ip %pI4h:%x, local_ip %pI4h:%x vlan=%x\n", + cm_info->remote_ip, cm_info->remote_port, + cm_info->local_ip, cm_info->local_port, + cm_info->vlan); + else + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "remote_ip %pI6h:%x, local_ip %pI6h:%x vlan=%x\n", + cm_info->remote_ip, cm_info->remote_port, + cm_info->local_ip, cm_info->local_port, + cm_info->vlan); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "private_data_len = %x ord = %d, ird = %d\n", + cm_info->private_data_len, cm_info->ord, cm_info->ird); +} + +static int +qed_iwarp_ll2_post_rx(struct qed_hwfn *p_hwfn, + struct qed_iwarp_ll2_buff *buf, u8 handle) +{ + int rc; + + rc = qed_ll2_post_rx_buffer(p_hwfn, handle, buf->data_phys_addr, + (u16)buf->buff_size, buf, 1); + if (rc) { + DP_NOTICE(p_hwfn, + "Failed to repost rx buffer to ll2 rc = %d, handle=%d\n", + rc, handle); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, buf->buff_size, + buf->data, buf->data_phys_addr); + kfree(buf); + } + + return rc; +} + +static bool +qed_iwarp_ep_exists(struct qed_hwfn *p_hwfn, struct qed_iwarp_cm_info *cm_info) +{ + struct qed_iwarp_ep *ep = NULL; + bool found = false; + + list_for_each_entry(ep, + &p_hwfn->p_rdma_info->iwarp.ep_list, + list_entry) { + if ((ep->cm_info.local_port == cm_info->local_port) && + (ep->cm_info.remote_port == cm_info->remote_port) && + (ep->cm_info.vlan == cm_info->vlan) && + !memcmp(&ep->cm_info.local_ip, cm_info->local_ip, + sizeof(cm_info->local_ip)) && + !memcmp(&ep->cm_info.remote_ip, cm_info->remote_ip, + sizeof(cm_info->remote_ip))) { + found = true; + break; + } + } + + if (found) { + DP_NOTICE(p_hwfn, + "SYN received on active connection - dropping\n"); + qed_iwarp_print_cm_info(p_hwfn, cm_info); + + return true; + } + + return false; +} + +static struct qed_iwarp_listener * +qed_iwarp_get_listener(struct qed_hwfn *p_hwfn, + struct qed_iwarp_cm_info *cm_info) +{ + struct qed_iwarp_listener *listener = NULL; + static const u32 ip_zero[4] = { 0, 0, 0, 0 }; + bool found = false; + + qed_iwarp_print_cm_info(p_hwfn, cm_info); + + list_for_each_entry(listener, + &p_hwfn->p_rdma_info->iwarp.listen_list, + list_entry) { + if (listener->port == cm_info->local_port) { + if (!memcmp(listener->ip_addr, + ip_zero, sizeof(ip_zero))) { + found = true; + break; + } + + if (!memcmp(listener->ip_addr, + cm_info->local_ip, + sizeof(cm_info->local_ip)) && + (listener->vlan == cm_info->vlan)) { + found = true; + break; + } + } + } + + if (found) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "listener found = %p\n", + listener); + return listener; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "listener not found\n"); + return NULL; +} + +static int +qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, + struct qed_iwarp_cm_info *cm_info, + void *buf, + u8 *remote_mac_addr, + u8 *local_mac_addr, + int *payload_len, int *tcp_start_offset) +{ + struct vlan_ethhdr *vethh; + bool vlan_valid = false; + struct ipv6hdr *ip6h; + struct ethhdr *ethh; + struct tcphdr *tcph; + struct iphdr *iph; + int eth_hlen; + int ip_hlen; + int eth_type; + int i; + + ethh = buf; + eth_type = ntohs(ethh->h_proto); + if (eth_type == ETH_P_8021Q) { + vlan_valid = true; + vethh = (struct vlan_ethhdr *)ethh; + cm_info->vlan = ntohs(vethh->h_vlan_TCI) & VLAN_VID_MASK; + eth_type = ntohs(vethh->h_vlan_encapsulated_proto); + } + + eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0); + + ether_addr_copy(remote_mac_addr, ethh->h_source); + ether_addr_copy(local_mac_addr, ethh->h_dest); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_type =%d source mac: %pM\n", + eth_type, ethh->h_source); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_hlen=%d destination mac: %pM\n", + eth_hlen, ethh->h_dest); + + iph = (struct iphdr *)((u8 *)(ethh) + eth_hlen); + + if (eth_type == ETH_P_IP) { + cm_info->local_ip[0] = ntohl(iph->daddr); + cm_info->remote_ip[0] = ntohl(iph->saddr); + cm_info->ip_version = TCP_IPV4; + + ip_hlen = (iph->ihl) * sizeof(u32); + *payload_len = ntohs(iph->tot_len) - ip_hlen; + } else if (eth_type == ETH_P_IPV6) { + ip6h = (struct ipv6hdr *)iph; + for (i = 0; i < 4; i++) { + cm_info->local_ip[i] = + ntohl(ip6h->daddr.in6_u.u6_addr32[i]); + cm_info->remote_ip[i] = + ntohl(ip6h->saddr.in6_u.u6_addr32[i]); + } + cm_info->ip_version = TCP_IPV6; + + ip_hlen = sizeof(*ip6h); + *payload_len = ntohs(ip6h->payload_len); + } else { + DP_NOTICE(p_hwfn, "Unexpected ethertype on ll2 %x\n", eth_type); + return -EINVAL; + } + + tcph = (struct tcphdr *)((u8 *)iph + ip_hlen); + + if (!tcph->syn) { + DP_NOTICE(p_hwfn, + "Only SYN type packet expected on this ll2 conn, iph->ihl=%d source=%d dest=%d\n", + iph->ihl, tcph->source, tcph->dest); + return -EINVAL; + } + + cm_info->local_port = ntohs(tcph->dest); + cm_info->remote_port = ntohs(tcph->source); + + qed_iwarp_print_cm_info(p_hwfn, cm_info); + + *tcp_start_offset = eth_hlen + ip_hlen; + + return 0; +} + +static void +qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data) +{ + struct qed_iwarp_ll2_buff *buf = data->cookie; + struct qed_iwarp_listener *listener; + struct qed_ll2_tx_pkt_info tx_pkt; + struct qed_iwarp_cm_info cm_info; + struct qed_hwfn *p_hwfn = cxt; + u8 remote_mac_addr[ETH_ALEN]; + u8 local_mac_addr[ETH_ALEN]; + struct qed_iwarp_ep *ep; + int tcp_start_offset; + u8 ts_hdr_size = 0; + u8 ll2_syn_handle; + int payload_len; + u32 hdr_size; + int rc; + + memset(&cm_info, 0, sizeof(cm_info)); + + if (GET_FIELD(data->parse_flags, + PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) && + GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) { + DP_NOTICE(p_hwfn, "Syn packet received with checksum error\n"); + goto err; + } + + rc = qed_iwarp_parse_rx_pkt(p_hwfn, &cm_info, (u8 *)(buf->data) + + data->u.placement_offset, remote_mac_addr, + local_mac_addr, &payload_len, + &tcp_start_offset); + if (rc) + goto err; + + /* Check if there is a listener for this 4-tuple+vlan */ + ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle; + listener = qed_iwarp_get_listener(p_hwfn, &cm_info); + if (!listener) { + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "SYN received on tuple not listened on parse_flags=%d packet len=%d\n", + data->parse_flags, data->length.packet_length); + + memset(&tx_pkt, 0, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.vlan = data->vlan; + + if (GET_FIELD(data->parse_flags, + PARSING_AND_ERR_FLAGS_TAG8021QEXIST)) + SET_FIELD(tx_pkt.bd_flags, + CORE_TX_BD_DATA_VLAN_INSERTION, 1); + + tx_pkt.l4_hdr_offset_w = (data->length.packet_length) >> 2; + tx_pkt.tx_dest = QED_LL2_TX_DEST_LB; + tx_pkt.first_frag = buf->data_phys_addr + + data->u.placement_offset; + tx_pkt.first_frag_len = data->length.packet_length; + tx_pkt.cookie = buf; + + rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_syn_handle, + &tx_pkt, true); + + if (rc) { + DP_NOTICE(p_hwfn, + "Can't post SYN back to chip rc=%d\n", rc); + goto err; + } + return; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Received syn on listening port\n"); + /* There may be an open ep on this connection if this is a syn + * retrasnmit... need to make sure there isn't... + */ + if (qed_iwarp_ep_exists(p_hwfn, &cm_info)) + goto err; + + ep = qed_iwarp_get_free_ep(p_hwfn); + if (!ep) + goto err; + + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + list_add_tail(&ep->list_entry, &p_hwfn->p_rdma_info->iwarp.ep_list); + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + ether_addr_copy(ep->remote_mac_addr, remote_mac_addr); + ether_addr_copy(ep->local_mac_addr, local_mac_addr); + + memcpy(&ep->cm_info, &cm_info, sizeof(ep->cm_info)); + + if (p_hwfn->p_rdma_info->iwarp.tcp_flags & QED_IWARP_TS_EN) + ts_hdr_size = TIMESTAMP_HEADER_SIZE; + + hdr_size = ((cm_info.ip_version == QED_TCP_IPV4) ? 40 : 60) + + ts_hdr_size; + ep->mss = p_hwfn->p_rdma_info->iwarp.max_mtu - hdr_size; + ep->mss = min_t(u16, QED_IWARP_MAX_FW_MSS, ep->mss); + + ep->event_cb = listener->event_cb; + ep->cb_context = listener->cb_context; + ep->connect_mode = TCP_CONNECT_PASSIVE; + + ep->syn = buf; + ep->syn_ip_payload_length = (u16)payload_len; + ep->syn_phy_addr = buf->data_phys_addr + data->u.placement_offset + + tcp_start_offset; + + rc = qed_iwarp_tcp_offload(p_hwfn, ep); + if (rc) { + qed_iwarp_return_ep(p_hwfn, ep); + goto err; + } + + return; +err: + qed_iwarp_ll2_post_rx(p_hwfn, buf, ll2_syn_handle); +} + +static void qed_iwarp_ll2_rel_rx_pkt(void *cxt, u8 connection_handle, + void *cookie, dma_addr_t rx_buf_addr, + bool b_last_packet) +{ + struct qed_iwarp_ll2_buff *buffer = cookie; + struct qed_hwfn *p_hwfn = cxt; + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size, + buffer->data, buffer->data_phys_addr); + kfree(buffer); +} + +static void qed_iwarp_ll2_comp_tx_pkt(void *cxt, u8 connection_handle, + void *cookie, dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet) +{ + struct qed_iwarp_ll2_buff *buffer = cookie; + struct qed_hwfn *p_hwfn = cxt; + + /* this was originally an rx packet, post it back */ + qed_iwarp_ll2_post_rx(p_hwfn, buffer, connection_handle); +} + +static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle, + void *cookie, dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet) +{ + struct qed_iwarp_ll2_buff *buffer = cookie; + struct qed_hwfn *p_hwfn = cxt; + + if (!buffer) + return; + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size, + buffer->data, buffer->data_phys_addr); + + kfree(buffer); +} + +static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + int rc = 0; + + if (iwarp_info->ll2_syn_handle != QED_IWARP_HANDLE_INVAL) { + rc = qed_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_syn_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate syn connection\n"); + + qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_syn_handle); + iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL; + } + + qed_llh_remove_mac_filter(p_hwfn, + p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr); + return rc; +} + +static int +qed_iwarp_ll2_alloc_buffers(struct qed_hwfn *p_hwfn, + int num_rx_bufs, int buff_size, u8 ll2_handle) +{ + struct qed_iwarp_ll2_buff *buffer; + int rc = 0; + int i; + + for (i = 0; i < num_rx_bufs; i++) { + buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + if (!buffer) { + rc = -ENOMEM; + break; + } + + buffer->data = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + buff_size, + &buffer->data_phys_addr, + GFP_KERNEL); + if (!buffer->data) { + kfree(buffer); + rc = -ENOMEM; + break; + } + + buffer->buff_size = buff_size; + rc = qed_iwarp_ll2_post_rx(p_hwfn, buffer, ll2_handle); + if (rc) + /* buffers will be deallocated by qed_ll2 */ + break; + } + return rc; +} + +#define QED_IWARP_MAX_BUF_SIZE(mtu) \ + ALIGN((mtu) + ETH_HLEN + 2 * VLAN_HLEN + 2 + ETH_CACHE_LINE_SIZE, \ + ETH_CACHE_LINE_SIZE) + +static int +qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, + struct qed_rdma_start_in_params *params, + struct qed_ptt *p_ptt) +{ + struct qed_iwarp_info *iwarp_info; + struct qed_ll2_acquire_data data; + struct qed_ll2_cbs cbs; + int rc = 0; + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL; + + iwarp_info->max_mtu = params->max_mtu; + + ether_addr_copy(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr); + + rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, params->mac_addr); + if (rc) + return rc; + + /* Start SYN connection */ + cbs.rx_comp_cb = qed_iwarp_ll2_comp_syn_pkt; + cbs.rx_release_cb = qed_iwarp_ll2_rel_rx_pkt; + cbs.tx_comp_cb = qed_iwarp_ll2_comp_tx_pkt; + cbs.tx_release_cb = qed_iwarp_ll2_rel_tx_pkt; + cbs.cookie = p_hwfn; + + memset(&data, 0, sizeof(data)); + data.input.conn_type = QED_LL2_TYPE_IWARP; + data.input.mtu = QED_IWARP_MAX_SYN_PKT_SIZE; + data.input.rx_num_desc = QED_IWARP_LL2_SYN_RX_SIZE; + data.input.tx_num_desc = QED_IWARP_LL2_SYN_TX_SIZE; + data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */ + data.input.tx_tc = PKT_LB_TC; + data.input.tx_dest = QED_LL2_TX_DEST_LB; + data.p_connection_handle = &iwarp_info->ll2_syn_handle; + data.cbs = &cbs; + + rc = qed_ll2_acquire_connection(p_hwfn, &data); + if (rc) { + DP_NOTICE(p_hwfn, "Failed to acquire LL2 connection\n"); + qed_llh_remove_mac_filter(p_hwfn, p_ptt, params->mac_addr); + return rc; + } + + rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_syn_handle); + if (rc) { + DP_NOTICE(p_hwfn, "Failed to establish LL2 connection\n"); + goto err; + } + + rc = qed_iwarp_ll2_alloc_buffers(p_hwfn, + QED_IWARP_LL2_SYN_RX_SIZE, + QED_IWARP_MAX_SYN_PKT_SIZE, + iwarp_info->ll2_syn_handle); + if (rc) + goto err; + + return rc; +err: + qed_iwarp_ll2_stop(p_hwfn, p_ptt); + + return rc; +} + +int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_rdma_start_in_params *params) +{ + struct qed_iwarp_info *iwarp_info; + u32 rcv_wnd_size; + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + + iwarp_info->tcp_flags = QED_IWARP_TS_EN; + rcv_wnd_size = QED_IWARP_RCV_WND_SIZE_DEF; + + /* value 0 is used for ilog2(QED_IWARP_RCV_WND_SIZE_MIN) */ + iwarp_info->rcv_wnd_scale = ilog2(rcv_wnd_size) - + ilog2(QED_IWARP_RCV_WND_SIZE_MIN); + iwarp_info->crc_needed = QED_IWARP_PARAM_CRC_NEEDED; + iwarp_info->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED; + + iwarp_info->peer2peer = QED_IWARP_PARAM_P2P; + + iwarp_info->rtr_type = MPA_RTR_TYPE_ZERO_SEND | + MPA_RTR_TYPE_ZERO_WRITE | + MPA_RTR_TYPE_ZERO_READ; + + spin_lock_init(&p_hwfn->p_rdma_info->iwarp.qp_lock); + INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_list); + INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.listen_list); + + qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP, + qed_iwarp_async_event); + + return qed_iwarp_ll2_start(p_hwfn, params, p_ptt); +} + +int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + int rc; + + qed_iwarp_free_prealloc_ep(p_hwfn); + rc = qed_iwarp_wait_for_all_cids(p_hwfn); + if (rc) + return rc; + + qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP); + + return qed_iwarp_ll2_stop(p_hwfn, p_ptt); +} + +void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn, + struct qed_iwarp_ep *ep, u8 fw_return_code) +{ + struct qed_iwarp_cm_event_params params; + + qed_iwarp_modify_qp(p_hwfn, ep->qp, QED_IWARP_QP_STATE_ERROR, true); + + params.event = QED_IWARP_EVENT_CLOSE; + params.ep_context = ep; + params.cm_info = &ep->cm_info; + params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ? + 0 : -ECONNRESET; + + ep->state = QED_IWARP_EP_CLOSED; + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + list_del(&ep->list_entry); + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + ep->event_cb(ep->cb_context, ¶ms); +} + +void qed_iwarp_exception_received(struct qed_hwfn *p_hwfn, + struct qed_iwarp_ep *ep, int fw_ret_code) +{ + struct qed_iwarp_cm_event_params params; + bool event_cb = false; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "EP(0x%x) fw_ret_code=%d\n", + ep->cid, fw_ret_code); + + switch (fw_ret_code) { + case IWARP_EXCEPTION_DETECTED_LLP_CLOSED: + params.status = 0; + params.event = QED_IWARP_EVENT_DISCONNECT; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LLP_RESET: + params.status = -ECONNRESET; + params.event = QED_IWARP_EVENT_DISCONNECT; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_RQ_EMPTY: + params.event = QED_IWARP_EVENT_RQ_EMPTY; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_IRQ_FULL: + params.event = QED_IWARP_EVENT_IRQ_FULL; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LLP_TIMEOUT: + params.event = QED_IWARP_EVENT_LLP_TIMEOUT; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_REMOTE_PROTECTION_ERROR: + params.event = QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_CQ_OVERFLOW: + params.event = QED_IWARP_EVENT_CQ_OVERFLOW; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LOCAL_CATASTROPHIC: + params.event = QED_IWARP_EVENT_QP_CATASTROPHIC; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_LOCAL_ACCESS_ERROR: + params.event = QED_IWARP_EVENT_LOCAL_ACCESS_ERROR; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_REMOTE_OPERATION_ERROR: + params.event = QED_IWARP_EVENT_REMOTE_OPERATION_ERROR; + event_cb = true; + break; + case IWARP_EXCEPTION_DETECTED_TERMINATE_RECEIVED: + params.event = QED_IWARP_EVENT_TERMINATE_RECEIVED; + event_cb = true; + break; + default: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Unhandled exception received...fw_ret_code=%d\n", + fw_ret_code); + break; + } + + if (event_cb) { + params.ep_context = ep; + params.cm_info = &ep->cm_info; + ep->event_cb(ep->cb_context, ¶ms); + } +} + +static void +qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn, + struct qed_iwarp_ep *ep, u8 fw_return_code) +{ + struct qed_iwarp_cm_event_params params; + + memset(¶ms, 0, sizeof(params)); + params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE; + params.ep_context = ep; + params.cm_info = &ep->cm_info; + ep->state = QED_IWARP_EP_CLOSED; + + switch (fw_return_code) { + case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "%s(0x%x) TCP connect got invalid packet\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid); + params.status = -ECONNRESET; + break; + case IWARP_CONN_ERROR_TCP_CONNECTION_RST: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "%s(0x%x) TCP Connection Reset\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid); + params.status = -ECONNRESET; + break; + case IWARP_CONN_ERROR_TCP_CONNECT_TIMEOUT: + DP_NOTICE(p_hwfn, "%s(0x%x) TCP timeout\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid); + params.status = -EBUSY; + break; + case IWARP_CONN_ERROR_MPA_NOT_SUPPORTED_VER: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA not supported VER\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid); + params.status = -ECONNREFUSED; + break; + case IWARP_CONN_ERROR_MPA_INVALID_PACKET: + DP_NOTICE(p_hwfn, "%s(0x%x) MPA Invalid Packet\n", + QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid); + params.status = -ECONNRESET; + break; + default: + DP_ERR(p_hwfn, + "%s(0x%x) Unexpected return code tcp connect: %d\n", + QED_IWARP_CONNECT_MODE_STRING(ep), + ep->tcp_cid, fw_return_code); + params.status = -ECONNRESET; + break; + } + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + ep->tcp_cid = QED_IWARP_INVALID_TCP_CID; + qed_iwarp_return_ep(p_hwfn, ep); + } else { + ep->event_cb(ep->cb_context, ¶ms); + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + list_del(&ep->list_entry); + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + } +} + +void +qed_iwarp_connect_complete(struct qed_hwfn *p_hwfn, + struct qed_iwarp_ep *ep, u8 fw_return_code) +{ + u8 ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle; + + if (ep->connect_mode == TCP_CONNECT_PASSIVE) { + /* Done with the SYN packet, post back to ll2 rx */ + qed_iwarp_ll2_post_rx(p_hwfn, ep->syn, ll2_syn_handle); + + ep->syn = NULL; + + /* If connect failed - upper layer doesn't know about it */ + if (fw_return_code == RDMA_RETURN_OK) + qed_iwarp_mpa_received(p_hwfn, ep); + else + qed_iwarp_tcp_connect_unsuccessful(p_hwfn, ep, + fw_return_code); + } else { + if (fw_return_code == RDMA_RETURN_OK) + qed_iwarp_mpa_offload(p_hwfn, ep); + else + qed_iwarp_tcp_connect_unsuccessful(p_hwfn, ep, + fw_return_code); + } +} + +static inline bool +qed_iwarp_check_ep_ok(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep) +{ + if (!ep || (ep->sig != QED_EP_SIG)) { + DP_ERR(p_hwfn, "ERROR ON ASYNC ep=%p\n", ep); + return false; + } + + return true; +} + +static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn, + u8 fw_event_code, u16 echo, + union event_ring_data *data, + u8 fw_return_code) +{ + struct regpair *fw_handle = &data->rdma_data.async_handle; + struct qed_iwarp_ep *ep = NULL; + u16 cid; + + ep = (struct qed_iwarp_ep *)(uintptr_t)HILO_64(fw_handle->hi, + fw_handle->lo); + + switch (fw_event_code) { + case IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE: + /* Async completion after TCP 3-way handshake */ + if (!qed_iwarp_check_ep_ok(p_hwfn, ep)) + return -EINVAL; + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "EP(0x%x) IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE fw_ret_code=%d\n", + ep->tcp_cid, fw_return_code); + qed_iwarp_connect_complete(p_hwfn, ep, fw_return_code); + break; + case IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED: + if (!qed_iwarp_check_ep_ok(p_hwfn, ep)) + return -EINVAL; + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED fw_ret_code=%d\n", + ep->cid, fw_return_code); + qed_iwarp_exception_received(p_hwfn, ep, fw_return_code); + break; + case IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE: + /* Async completion for Close Connection ramrod */ + if (!qed_iwarp_check_ep_ok(p_hwfn, ep)) + return -EINVAL; + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE fw_ret_code=%d\n", + ep->cid, fw_return_code); + qed_iwarp_qp_in_error(p_hwfn, ep, fw_return_code); + break; + case IWARP_EVENT_TYPE_ASYNC_ENHANCED_MPA_REPLY_ARRIVED: + /* Async event for active side only */ + if (!qed_iwarp_check_ep_ok(p_hwfn, ep)) + return -EINVAL; + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_MPA_REPLY_ARRIVED fw_ret_code=%d\n", + ep->cid, fw_return_code); + qed_iwarp_mpa_reply_arrived(p_hwfn, ep); + break; + case IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE: + if (!qed_iwarp_check_ep_ok(p_hwfn, ep)) + return -EINVAL; + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE fw_ret_code=%d\n", + ep->cid, fw_return_code); + qed_iwarp_mpa_complete(p_hwfn, ep, fw_return_code); + break; + case IWARP_EVENT_TYPE_ASYNC_CID_CLEANED: + cid = (u16)le32_to_cpu(fw_handle->lo); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "(0x%x)IWARP_EVENT_TYPE_ASYNC_CID_CLEANED\n", cid); + qed_iwarp_cid_cleaned(p_hwfn, cid); + + break; + case IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW: + DP_NOTICE(p_hwfn, "IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW\n"); + + p_hwfn->p_rdma_info->events.affiliated_event( + p_hwfn->p_rdma_info->events.context, + QED_IWARP_EVENT_CQ_OVERFLOW, + (void *)fw_handle); + break; + default: + DP_ERR(p_hwfn, "Received unexpected async iwarp event %d\n", + fw_event_code); + return -EINVAL; + } + return 0; +} + +int +qed_iwarp_create_listen(void *rdma_cxt, + struct qed_iwarp_listen_in *iparams, + struct qed_iwarp_listen_out *oparams) +{ + struct qed_hwfn *p_hwfn = rdma_cxt; + struct qed_iwarp_listener *listener; + + listener = kzalloc(sizeof(*listener), GFP_KERNEL); + if (!listener) + return -ENOMEM; + + listener->ip_version = iparams->ip_version; + memcpy(listener->ip_addr, iparams->ip_addr, sizeof(listener->ip_addr)); + listener->port = iparams->port; + listener->vlan = iparams->vlan; + + listener->event_cb = iparams->event_cb; + listener->cb_context = iparams->cb_context; + listener->max_backlog = iparams->max_backlog; + oparams->handle = listener; + + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + list_add_tail(&listener->list_entry, + &p_hwfn->p_rdma_info->iwarp.listen_list); + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "callback=%p handle=%p ip=%x:%x:%x:%x port=0x%x vlan=0x%x\n", + listener->event_cb, + listener, + listener->ip_addr[0], + listener->ip_addr[1], + listener->ip_addr[2], + listener->ip_addr[3], listener->port, listener->vlan); + + return 0; +} + +int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle) +{ + struct qed_iwarp_listener *listener = handle; + struct qed_hwfn *p_hwfn = rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "handle=%p\n", handle); + + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + list_del(&listener->list_entry); + spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); + + kfree(listener); + + return 0; +} + +int qed_iwarp_send_rtr(void *rdma_cxt, struct qed_iwarp_send_rtr_in *iparams) +{ + struct qed_hwfn *p_hwfn = rdma_cxt; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + struct qed_iwarp_ep *ep; + struct qed_rdma_qp *qp; + int rc; + + ep = iparams->ep_context; + if (!ep) { + DP_ERR(p_hwfn, "Ep Context receive in send_rtr is NULL\n"); + return -EINVAL; + } + + qp = ep->qp; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x)\n", + qp->icid, ep->tcp_cid); + + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_CB; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + IWARP_RAMROD_CMD_ID_MPA_OFFLOAD_SEND_RTR, + PROTOCOLID_IWARP, &init_data); + + if (rc) + return rc; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = 0x%x\n", rc); + + return rc; +} + +void +qed_iwarp_query_qp(struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) +{ + out_params->state = qed_iwarp2roce_state(qp->iwarp_state); +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h new file mode 100644 index 000000000000..148ef3c33a5d --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -0,0 +1,189 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QED_IWARP_H +#define _QED_IWARP_H + +enum qed_iwarp_qp_state { + QED_IWARP_QP_STATE_IDLE, + QED_IWARP_QP_STATE_RTS, + QED_IWARP_QP_STATE_TERMINATE, + QED_IWARP_QP_STATE_CLOSING, + QED_IWARP_QP_STATE_ERROR, +}; + +enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state); + +#define QED_IWARP_PREALLOC_CNT (256) + +#define QED_IWARP_LL2_SYN_TX_SIZE (128) +#define QED_IWARP_LL2_SYN_RX_SIZE (256) +#define QED_IWARP_MAX_SYN_PKT_SIZE (128) +#define QED_IWARP_HANDLE_INVAL (0xff) + +struct qed_iwarp_ll2_buff { + void *data; + dma_addr_t data_phys_addr; + u32 buff_size; +}; + +struct qed_iwarp_info { + struct list_head listen_list; /* qed_iwarp_listener */ + struct list_head ep_list; /* qed_iwarp_ep */ + struct list_head ep_free_list; /* pre-allocated ep's */ + spinlock_t iw_lock; /* for iwarp resources */ + spinlock_t qp_lock; /* for teardown races */ + u32 rcv_wnd_scale; + u16 max_mtu; + u8 mac_addr[ETH_ALEN]; + u8 crc_needed; + u8 tcp_flags; + u8 ll2_syn_handle; + u8 peer2peer; + enum mpa_negotiation_mode mpa_rev; + enum mpa_rtr_type rtr_type; +}; + +enum qed_iwarp_ep_state { + QED_IWARP_EP_INIT, + QED_IWARP_EP_MPA_REQ_RCVD, + QED_IWARP_EP_MPA_OFFLOADED, + QED_IWARP_EP_ESTABLISHED, + QED_IWARP_EP_CLOSED +}; + +union async_output { + struct iwarp_eqe_data_mpa_async_completion mpa_response; + struct iwarp_eqe_data_tcp_async_completion mpa_request; +}; + +#define QED_MAX_PRIV_DATA_LEN (512) +struct qed_iwarp_ep_memory { + u8 in_pdata[QED_MAX_PRIV_DATA_LEN]; + u8 out_pdata[QED_MAX_PRIV_DATA_LEN]; + union async_output async_output; +}; + +/* Endpoint structure represents a TCP connection. This connection can be + * associated with a QP or not (in which case QP==NULL) + */ +struct qed_iwarp_ep { + struct list_head list_entry; + struct qed_rdma_qp *qp; + struct qed_iwarp_ep_memory *ep_buffer_virt; + dma_addr_t ep_buffer_phys; + enum qed_iwarp_ep_state state; + int sig; + struct qed_iwarp_cm_info cm_info; + enum tcp_connect_mode connect_mode; + enum mpa_rtr_type rtr_type; + enum mpa_negotiation_mode mpa_rev; + u32 tcp_cid; + u32 cid; + u16 mss; + u8 remote_mac_addr[6]; + u8 local_mac_addr[6]; + bool mpa_reply_processed; + + /* For Passive side - syn packet related data */ + u16 syn_ip_payload_length; + struct qed_iwarp_ll2_buff *syn; + dma_addr_t syn_phy_addr; + + /* The event_cb function is called for asynchrounous events associated + * with the ep. It is initialized at different entry points depending + * on whether the ep is the tcp connection active side or passive side + * The cb_context is passed to the event_cb function. + */ + iwarp_event_handler event_cb; + void *cb_context; +}; + +struct qed_iwarp_listener { + struct list_head list_entry; + + /* The event_cb function is called for connection requests. + * The cb_context is passed to the event_cb function. + */ + iwarp_event_handler event_cb; + void *cb_context; + u32 max_backlog; + u32 ip_addr[4]; + u16 port; + u16 vlan; + u8 ip_version; +}; + +int qed_iwarp_alloc(struct qed_hwfn *p_hwfn); + +int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_rdma_start_in_params *params); + +int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + +void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn); + +void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn); + +void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + +int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + struct qed_rdma_create_qp_out_params *out_params); + +int qed_iwarp_modify_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp, + enum qed_iwarp_qp_state new_state, bool internal); + +int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp); + +int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp); + +void qed_iwarp_query_qp(struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params); + +int +qed_iwarp_connect(void *rdma_cxt, + struct qed_iwarp_connect_in *iparams, + struct qed_iwarp_connect_out *oparams); + +int +qed_iwarp_create_listen(void *rdma_cxt, + struct qed_iwarp_listen_in *iparams, + struct qed_iwarp_listen_out *oparams); + +int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams); + +int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams); +int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle); + +int qed_iwarp_send_rtr(void *rdma_cxt, struct qed_iwarp_send_rtr_in *iparams); + +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index e57699bfbdfa..0ba5ec8a9814 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -79,8 +79,7 @@ int qed_l2_alloc(struct qed_hwfn *p_hwfn) unsigned long **pp_qids; u32 i; - if (p_hwfn->hw_info.personality != QED_PCI_ETH && - p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE) + if (!QED_IS_L2_PERSONALITY(p_hwfn)) return 0; p_l2_info = kzalloc(sizeof(*p_l2_info), GFP_KERNEL); @@ -1228,19 +1227,6 @@ static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode) return action; } -static void qed_set_fw_mac_addr(__le16 *fw_msb, - __le16 *fw_mid, - __le16 *fw_lsb, - u8 *mac) -{ - ((u8 *)fw_msb)[0] = mac[1]; - ((u8 *)fw_msb)[1] = mac[0]; - ((u8 *)fw_mid)[0] = mac[3]; - ((u8 *)fw_mid)[1] = mac[2]; - ((u8 *)fw_lsb)[0] = mac[5]; - ((u8 *)fw_lsb)[1] = mac[4]; -} - static int qed_filter_ucast_common(struct qed_hwfn *p_hwfn, u16 opaque_fid, diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 17f9b0a7b553..c06ad4f0758e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -309,7 +309,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) list_del(&p_pkt->list_entry); b_last_packet = list_empty(&p_tx->active_descq); list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); - if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) { + if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) { struct qed_ooo_buffer *p_buffer; p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie; @@ -532,7 +532,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) list_move_tail(&p_pkt->list_entry, &p_rx->free_descq); - if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) { + if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) { struct qed_ooo_buffer *p_buffer; p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie; @@ -893,11 +893,11 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg; p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en; p_ramrod->queue_id = p_ll2_conn->queue_id; - p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_ISCSI_OOO) ? 0 - : 1; + p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1; if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) && - p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) { + p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) && + (conn_type != QED_LL2_TYPE_IWARP)) { p_ramrod->mf_si_bcast_accept_all = 1; p_ramrod->mf_si_mcast_accept_all = 1; } else { @@ -924,7 +924,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) return 0; - if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) p_ll2_conn->tx_stats_en = 0; else p_ll2_conn->tx_stats_en = 1; @@ -955,10 +955,10 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, p_ramrod->pbl_size = cpu_to_le16(pbl_size); switch (p_ll2_conn->input.tx_tc) { - case LB_TC: + case PURE_LB_TC: pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB); break; - case OOO_LB_TC: + case PKT_LB_TC: pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OOO); break; default: @@ -973,12 +973,20 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, p_ramrod->conn_type = PROTOCOLID_FCOE; break; case QED_LL2_TYPE_ISCSI: - case QED_LL2_TYPE_ISCSI_OOO: p_ramrod->conn_type = PROTOCOLID_ISCSI; break; case QED_LL2_TYPE_ROCE: p_ramrod->conn_type = PROTOCOLID_ROCE; break; + case QED_LL2_TYPE_IWARP: + p_ramrod->conn_type = PROTOCOLID_IWARP; + break; + case QED_LL2_TYPE_OOO: + if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) + p_ramrod->conn_type = PROTOCOLID_ISCSI; + else + p_ramrod->conn_type = PROTOCOLID_IWARP; + break; default: p_ramrod->conn_type = PROTOCOLID_ETH; DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type); @@ -1142,7 +1150,7 @@ qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn, u16 buf_idx; int rc = 0; - if (p_ll2_info->input.conn_type != QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_info->input.conn_type != QED_LL2_TYPE_OOO) return rc; /* Correct number of requested OOO buffers if needed */ @@ -1280,7 +1288,7 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data) goto q_allocate_fail; /* Register callbacks for the Rx/Tx queues */ - if (data->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) { + if (data->input.conn_type == QED_LL2_TYPE_OOO) { comp_rx_cb = qed_ll2_lb_rxq_completion; comp_tx_cb = qed_ll2_lb_txq_completion; } else { @@ -1339,7 +1347,7 @@ static void qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn) { - if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_OOO) return; qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info); @@ -1421,7 +1429,7 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle) if (rc) goto out; - if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE) + if (!QED_IS_RDMA_PERSONALITY(p_hwfn)) qed_wr(p_hwfn, p_ptt, PRS_REG_USE_LIGHT_L2, 1); qed_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn); @@ -1794,7 +1802,7 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle) qed_ll2_rxq_flush(p_hwfn, connection_handle); } - if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info); if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) { @@ -1816,7 +1824,7 @@ static void qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn, { struct qed_ooo_buffer *p_buffer; - if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO) + if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_OOO) return; qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info); @@ -2063,7 +2071,7 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev, ll2_cbs.cookie = QED_LEADING_HWFN(cdev); if (lb) { - data->input.tx_tc = OOO_LB_TC; + data->input.tx_tc = PKT_LB_TC; data->input.tx_dest = QED_LL2_TX_DEST_LB; } else { data->input.tx_tc = 0; @@ -2080,7 +2088,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev, int rc; qed_ll2_set_conn_data(cdev, &data, params, - QED_LL2_TYPE_ISCSI_OOO, handle, true); + QED_LL2_TYPE_OOO, handle, true); rc = qed_ll2_acquire_connection(hwfn, &data); if (rc) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 16cc30b11cce..b11399606990 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -237,6 +237,8 @@ err0: int qed_fill_dev_info(struct qed_dev *cdev, struct qed_dev_info *dev_info) { + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_hw_info *hw_info = &p_hwfn->hw_info; struct qed_tunnel_info *tun = &cdev->tunnel; struct qed_ptt *ptt; @@ -260,11 +262,10 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->pci_mem_start = cdev->pci_params.mem_start; dev_info->pci_mem_end = cdev->pci_params.mem_end; dev_info->pci_irq = cdev->pci_params.irq; - dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality == - QED_PCI_ETH_ROCE); + dev_info->rdma_supported = QED_IS_RDMA_PERSONALITY(p_hwfn); dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]); dev_info->dev_type = cdev->type; - ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr); + ether_addr_copy(dev_info->hw_mac, hw_info->hw_mac_addr); if (IS_PF(cdev)) { dev_info->fw_major = FW_MAJOR_VERSION; @@ -274,8 +275,7 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->mf_mode = cdev->mf_mode; dev_info->tx_switching = true; - if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support == - QED_WOL_SUPPORT_PME) + if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME) dev_info->wol_support = true; dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id; @@ -304,7 +304,7 @@ int qed_fill_dev_info(struct qed_dev *cdev, &dev_info->mfw_rev, NULL); } - dev_info->mtu = QED_LEADING_HWFN(cdev)->hw_info.mtu; + dev_info->mtu = hw_info->mtu; return 0; } @@ -790,7 +790,7 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, cdev->num_hwfns; if (!IS_ENABLED(CONFIG_QED_RDMA) || - QED_LEADING_HWFN(cdev)->hw_info.personality != QED_PCI_ETH_ROCE) + !QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev))) return 0; for_each_hwfn(cdev, i) @@ -931,8 +931,7 @@ static void qed_update_pf_params(struct qed_dev *cdev, /* In case we might support RDMA, don't allow qede to be greedy * with the L2 contexts. Allow for 64 queues [rx, tx, xdp] per hwfn. */ - if (QED_LEADING_HWFN(cdev)->hw_info.personality == - QED_PCI_ETH_ROCE) { + if (QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev))) { u16 *num_cons; num_cons = ¶ms->eth_pf_params.num_cons; diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index df76e212f86e..6fb99518a61f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -161,7 +161,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, NULL); - p_rdma_info->num_qps = num_cons / 2; + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + p_rdma_info->num_qps = num_cons; + else + p_rdma_info->num_qps = num_cons / 2; /* 2 cids per qp */ num_tasks = qed_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE); @@ -252,6 +255,13 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, "Failed to allocate real cid bitmap, rc = %d\n", rc); goto free_cid_map; } + + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + rc = qed_iwarp_alloc(p_hwfn); + + if (rc) + goto free_cid_map; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n"); return 0; @@ -329,6 +339,9 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) { struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + qed_iwarp_resc_free(p_hwfn); + qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cid_map, 1); qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->pd_map, 1); qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, 1); @@ -470,6 +483,9 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn, if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN) SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1); + + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + qed_iwarp_init_devinfo(p_hwfn); } static void qed_rdma_init_port(struct qed_hwfn *p_hwfn) @@ -490,29 +506,17 @@ static void qed_rdma_init_port(struct qed_hwfn *p_hwfn) static int qed_rdma_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { - u32 ll2_ethertype_en; + int rc = 0; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW\n"); p_hwfn->b_rdma_enabled_in_prs = false; - qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0); - - p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE; - - /* We delay writing to this reg until first cid is allocated. See - * qed_cxt_dynamic_ilt_alloc function for more details - */ - ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN); - qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, - (ll2_ethertype_en | 0x01)); - - if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) { - DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n"); - return -EINVAL; - } + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + qed_iwarp_init_hw(p_hwfn, p_ptt); + else + rc = qed_roce_init_hw(p_hwfn, p_ptt); - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n"); - return 0; + return rc; } static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, @@ -544,7 +548,10 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, if (rc) return rc; - p_ramrod = &p_ent->ramrod.roce_init_func.rdma; + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma; + else + p_ramrod = &p_ent->ramrod.roce_init_func.rdma; p_params_header = &p_ramrod->params_header; p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn, @@ -641,7 +648,15 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn, if (rc) return rc; - qed_roce_setup(p_hwfn); + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { + rc = qed_iwarp_setup(p_hwfn, p_ptt, params); + if (rc) + return rc; + } else { + rc = qed_roce_setup(p_hwfn); + if (rc) + return rc; + } return qed_rdma_start_fw(p_hwfn, params, p_ptt); } @@ -675,7 +690,16 @@ int qed_rdma_stop(void *rdma_cxt) qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, (ll2_ethertype_en & 0xFFFE)); - qed_roce_stop(p_hwfn); + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { + rc = qed_iwarp_stop(p_hwfn, p_ptt); + if (rc) { + qed_ptt_release(p_hwfn, p_ptt); + return rc; + } + } else { + qed_roce_stop(p_hwfn); + } + qed_ptt_release(p_hwfn, p_ptt); /* Get SPQ entry */ @@ -810,7 +834,9 @@ static int qed_fill_rdma_dev_info(struct qed_dev *cdev, memset(info, 0, sizeof(*info)); - info->rdma_type = QED_RDMA_TYPE_ROCE; + info->rdma_type = QED_IS_ROCE_PERSONALITY(p_hwfn) ? + QED_RDMA_TYPE_ROCE : QED_RDMA_TYPE_IWARP; + info->user_dpm_enabled = (p_hwfn->db_bar_no_edpm == 0); qed_fill_dev_info(cdev, &info->common); @@ -1112,7 +1138,7 @@ static int qed_rdma_query_qp(void *rdma_cxt, struct qed_rdma_query_qp_out_params *out_params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; - int rc; + int rc = 0; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); @@ -1138,7 +1164,10 @@ static int qed_rdma_query_qp(void *rdma_cxt, out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp; out_params->sqd_async = qp->sqd_async; - rc = qed_roce_query_qp(p_hwfn, qp, out_params); + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + qed_iwarp_query_qp(qp, out_params); + else + rc = qed_roce_query_qp(p_hwfn, qp, out_params); DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query QP, rc = %d\n", rc); return rc; @@ -1151,7 +1180,10 @@ static int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp) DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); - rc = qed_roce_destroy_qp(p_hwfn, qp); + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + rc = qed_iwarp_destroy_qp(p_hwfn, qp); + else + rc = qed_roce_destroy_qp(p_hwfn, qp); /* free qp params struct */ kfree(qp); @@ -1190,20 +1222,27 @@ qed_rdma_create_qp(void *rdma_cxt, return NULL; } + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { + if (in_params->sq_num_pages * sizeof(struct regpair) > + IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE) { + DP_NOTICE(p_hwfn->cdev, + "Sq num pages: %d exceeds maximum\n", + in_params->sq_num_pages); + return NULL; + } + if (in_params->rq_num_pages * sizeof(struct regpair) > + IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE) { + DP_NOTICE(p_hwfn->cdev, + "Rq num pages: %d exceeds maximum\n", + in_params->rq_num_pages); + return NULL; + } + } + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return NULL; - rc = qed_roce_alloc_cid(p_hwfn, &qp->icid); - qp->qpid = ((0xFF << 16) | qp->icid); - - DP_INFO(p_hwfn, "ROCE qpid=%x\n", qp->qpid); - - if (rc) { - kfree(qp); - return NULL; - } - qp->cur_state = QED_ROCE_QP_STATE_RESET; qp->qp_handle.hi = cpu_to_le32(in_params->qp_handle_hi); qp->qp_handle.lo = cpu_to_le32(in_params->qp_handle_lo); @@ -1226,6 +1265,19 @@ qed_rdma_create_qp(void *rdma_cxt, qp->e2e_flow_control_en = qp->use_srq ? false : true; qp->stats_queue = in_params->stats_queue; + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { + rc = qed_iwarp_create_qp(p_hwfn, qp, out_params); + qp->qpid = qp->icid; + } else { + rc = qed_roce_alloc_cid(p_hwfn, &qp->icid); + qp->qpid = ((0xFF << 16) | qp->icid); + } + + if (rc) { + kfree(qp); + return NULL; + } + out_params->icid = qp->icid; out_params->qp_id = qp->qpid; @@ -1324,7 +1376,14 @@ static int qed_rdma_modify_qp(void *rdma_cxt, qp->cur_state); } - rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params); + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { + enum qed_iwarp_qp_state new_state = + qed_roce2iwarp_state(qp->cur_state); + + rc = qed_iwarp_modify_qp(p_hwfn, qp, new_state, 0); + } else { + rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params); + } DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify QP, rc = %d\n", rc); return rc; @@ -1713,6 +1772,12 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet, .ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter, .ll2_get_stats = &qed_ll2_get_stats, + .iwarp_connect = &qed_iwarp_connect, + .iwarp_create_listen = &qed_iwarp_create_listen, + .iwarp_destroy_listen = &qed_iwarp_destroy_listen, + .iwarp_accept = &qed_iwarp_accept, + .iwarp_reject = &qed_iwarp_reject, + .iwarp_send_rtr = &qed_iwarp_send_rtr, }; const struct qed_rdma_ops *qed_get_rdma_ops(void) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h index d91e5c4069a6..18ec9cbd84f5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h @@ -42,6 +42,7 @@ #include "qed.h" #include "qed_dev_api.h" #include "qed_hsi.h" +#include "qed_iwarp.h" #include "qed_roce.h" #define QED_RDMA_MAX_FMR (RDMA_MAX_TIDS) @@ -84,6 +85,7 @@ struct qed_rdma_info { struct qed_bmap qp_map; struct qed_bmap srq_map; struct qed_bmap cid_map; + struct qed_bmap tcp_cid_map; struct qed_bmap real_cid_map; struct qed_bmap dpi_map; struct qed_bmap toggle_bits; @@ -97,6 +99,7 @@ struct qed_rdma_info { u16 queue_zone_base; u16 max_queue_zones; enum protocol_type proto; + struct qed_iwarp_info iwarp; }; struct qed_rdma_qp { @@ -105,6 +108,7 @@ struct qed_rdma_qp { u32 qpid; u16 icid; enum qed_roce_qp_state cur_state; + enum qed_iwarp_qp_state iwarp_state; bool use_srq; bool signal_all; bool fmr_and_reserved_lkey; @@ -164,6 +168,7 @@ struct qed_rdma_qp { void *shared_queue; dma_addr_t shared_queue_phys_addr; + struct qed_iwarp_ep *ep; }; #if IS_ENABLED(CONFIG_QED_RDMA) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index e53adc3d009b..fb7c2d1562ae 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -1149,3 +1149,23 @@ int qed_roce_setup(struct qed_hwfn *p_hwfn) qed_roce_async_event); } +int qed_roce_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 ll2_ethertype_en; + + qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0); + + p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE; + + ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN); + qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en | 0x01)); + + if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) { + DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n"); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n"); + return 0; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 56c95fb9a26d..ab4ad8a1e2a5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -104,12 +104,17 @@ union ramrod_data { struct roce_query_qp_req_ramrod_data roce_query_qp_req; struct roce_destroy_qp_resp_ramrod_data roce_destroy_qp_resp; struct roce_destroy_qp_req_ramrod_data roce_destroy_qp_req; + struct roce_init_func_ramrod_data roce_init_func; struct rdma_create_cq_ramrod_data rdma_create_cq; struct rdma_destroy_cq_ramrod_data rdma_destroy_cq; struct rdma_srq_create_ramrod_data rdma_create_srq; struct rdma_srq_destroy_ramrod_data rdma_destroy_srq; struct rdma_srq_modify_ramrod_data rdma_modify_srq; - struct roce_init_func_ramrod_data roce_init_func; + struct iwarp_create_qp_ramrod_data iwarp_create_qp; + struct iwarp_tcp_offload_ramrod_data iwarp_tcp_offload; + struct iwarp_mpa_offload_ramrod_data iwarp_mpa_offload; + struct iwarp_modify_qp_ramrod_data iwarp_modify_qp; + struct iwarp_init_func_ramrod_data iwarp_init_func; struct fcoe_init_ramrod_params fcoe_init; struct fcoe_conn_offload_ramrod_params fcoe_conn_ofld; struct fcoe_conn_terminate_ramrod_params fcoe_conn_terminate; diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index bd0e3f157e9e..600e30e8f0be 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -1409,8 +1409,8 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port, *index = entry->index; resolved = false; } else if (removing) { - ofdpa_neigh_del(found); *index = found->index; + ofdpa_neigh_del(found); } else if (updating) { ofdpa_neigh_update(found, NULL, false); resolved = !is_zero_ether_addr(found->eth_dst); diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index ad9c4ded2b90..761c518b2f92 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -4172,7 +4172,7 @@ found: * recipients */ if (is_mc_recip) { - MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); unsigned int depth, i; memset(inbuf, 0, sizeof(inbuf)); @@ -4320,7 +4320,7 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx, efx_ef10_filter_set_entry(table, filter_idx, NULL, 0); } else { efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, - MC_CMD_FILTER_OP_IN_LEN, + MC_CMD_FILTER_OP_EXT_IN_LEN, NULL, 0, rc); } } @@ -4453,7 +4453,7 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx, struct efx_filter_spec *spec) { struct efx_ef10_filter_table *table = efx->filter_state; - MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); struct efx_filter_spec *saved_spec; unsigned int hash, i, depth = 1; bool replacing = false; @@ -4940,7 +4940,7 @@ not_restored: static void efx_ef10_filter_table_remove(struct efx_nic *efx) { struct efx_ef10_filter_table *table = efx->filter_state; - MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); struct efx_filter_spec *spec; unsigned int filter_idx; int rc; @@ -5105,6 +5105,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx, /* Insert/renew filters */ for (i = 0; i < addr_count; i++) { + EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID); efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr); rc = efx_ef10_filter_insert(efx, &spec, true); @@ -5122,11 +5123,11 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx, } return rc; } else { - /* mark as not inserted, and carry on */ - rc = EFX_EF10_FILTER_ID_INVALID; + /* keep invalid ID, and carry on */ } + } else { + ids[i] = efx_ef10_filter_get_unsafe_id(rc); } - ids[i] = efx_ef10_filter_get_unsafe_id(rc); } if (multicast && rollback) { diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index b9422450deb8..3df872f56289 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -1301,7 +1301,7 @@ static void efx_mcdi_abandon(struct efx_nic *efx) efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT); } -/* Called from falcon_process_eventq for MCDI events */ +/* Called from efx_farch_ev_process and efx_ef10_ev_process for MCDI events */ void efx_mcdi_process_event(struct efx_channel *channel, efx_qword_t *event) { @@ -1389,8 +1389,9 @@ void efx_mcdi_process_event(struct efx_channel *channel, MCDI_EVENT_FIELD(*event, PROXY_RESPONSE_RC)); break; default: - netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n", - code); + netif_err(efx, hw, efx->net_dev, + "Unknown MCDI event " EFX_QWORD_FMT "\n", + EFX_QWORD_VAL(*event)); } } diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 0d230b125c6c..080428762858 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2485,7 +2485,7 @@ static int smc_drv_resume(struct device *dev) return 0; } -static struct dev_pm_ops smc_drv_pm_ops = { +static const struct dev_pm_ops smc_drv_pm_ops = { .suspend = smc_drv_suspend, .resume = smc_drv_resume, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index fffd6d5fc907..6c2d1da05588 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -638,7 +638,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) { struct sunxi_priv_data *gmac = priv->plat->bsp_priv; struct device_node *node = priv->device->of_node; - int ret; + int ret, phy_interface; u32 reg, val; regmap_read(gmac->regmap, SYSCON_EMAC_REG, &val); @@ -718,7 +718,11 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) if (gmac->variant->support_rmii) reg &= ~SYSCON_RMII_EN; - switch (priv->plat->interface) { + phy_interface = priv->plat->interface; + /* if PHY is internal, select the mode (xMII) used by the SoC */ + if (gmac->use_internal_phy) + phy_interface = gmac->variant->internal_phy; + switch (phy_interface) { case PHY_INTERFACE_MODE_MII: /* default */ break; @@ -932,7 +936,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) } plat_dat->interface = of_get_phy_mode(dev->of_node); - if (plat_dat->interface == gmac->variant->internal_phy) { + if (plat_dat->interface == PHY_INTERFACE_MODE_INTERNAL) { dev_info(&pdev->dev, "Will use internal PHY\n"); gmac->use_internal_phy = true; gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index 471a9aa6ac94..22cf6353ba04 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -205,8 +205,8 @@ static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space) { int i; - for (i = 0; i < 22; i++) - if ((i < 9) || (i > 17)) + for (i = 0; i < 23; i++) + if ((i < 12) || (i > 17)) reg_space[DMA_BUS_MODE / 4 + i] = readl(ioaddr + DMA_BUS_MODE + i * 4); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 743170d57f62..babb39c646ff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -29,7 +29,7 @@ #include "stmmac.h" #include "dwmac_dma.h" -#define REG_SPACE_SIZE 0x1054 +#define REG_SPACE_SIZE 0x1060 #define MAC100_ETHTOOL_NAME "st_mac100" #define GMAC_ETHTOOL_NAME "st_gmac" diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c index 1562ab4151e1..56ba411421f0 100644 --- a/drivers/net/ethernet/ti/cpsw-common.c +++ b/drivers/net/ethernet/ti/cpsw-common.c @@ -90,7 +90,7 @@ int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr) if (of_device_is_compatible(dev->of_node, "ti,dm816-emac")) return cpsw_am33xx_cm_get_macid(dev, 0x30, slave, mac_addr); - if (of_machine_is_compatible("ti,am4372")) + if (of_machine_is_compatible("ti,am43")) return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr); if (of_machine_is_compatible("ti,dra7")) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b7a0f5eeab62..1850e348f555 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1236,6 +1236,7 @@ static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv, { struct cpsw_common *cpsw = priv->cpsw; + skb_tx_timestamp(skb); return cpdma_chan_submit(txch, skb, skb->data, skb->len, priv->emac_port + cpsw->data.dual_emac); } @@ -1597,6 +1598,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; + struct cpts *cpts = cpsw->cpts; struct netdev_queue *txq; struct cpdma_chan *txch; int ret, q_idx; @@ -1608,11 +1610,9 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, } if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - cpts_is_tx_enabled(cpsw->cpts)) + cpts_is_tx_enabled(cpts) && cpts_can_timestamp(cpts, skb)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - skb_tx_timestamp(skb); - q_idx = skb_get_queue_mapping(skb); if (q_idx >= cpsw->tx_ch_num) q_idx = q_idx % cpsw->tx_ch_num; diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h index c96eca2b1b46..01ea82ba9cdc 100644 --- a/drivers/net/ethernet/ti/cpts.h +++ b/drivers/net/ethernet/ti/cpts.h @@ -30,6 +30,7 @@ #include <linux/of.h> #include <linux/ptp_clock_kernel.h> #include <linux/skbuff.h> +#include <linux/ptp_classify.h> #include <linux/timecounter.h> struct cpsw_cpts { @@ -155,6 +156,16 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts) return !!cpts->tx_enable; } +static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb) +{ + unsigned int class = ptp_classify_raw(skb); + + if (class == PTP_CLASS_NONE) + return false; + + return true; +} + #else struct cpts; @@ -203,6 +214,11 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts) { return false; } + +static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb) +{ + return false; +} #endif diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 0847a8f48cfe..28cb38af1a34 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -2503,24 +2503,8 @@ static bool gbe_need_txtstamp(struct gbe_intf *gbe_intf, const struct netcp_packet *p_info) { struct sk_buff *skb = p_info->skb; - unsigned int class = ptp_classify_raw(skb); - if (class == PTP_CLASS_NONE) - return false; - - switch (class) { - case PTP_CLASS_V1_IPV4: - case PTP_CLASS_V1_IPV6: - case PTP_CLASS_V2_IPV4: - case PTP_CLASS_V2_IPV6: - case PTP_CLASS_V2_L2: - case (PTP_CLASS_V2_VLAN | PTP_CLASS_L2): - case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV4): - case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV6): - return true; - } - - return false; + return cpts_can_timestamp(gbe_intf->gbe_dev->cpts, skb); } static int gbe_txtstamp_mark_pkt(struct gbe_intf *gbe_intf, diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index eb77201cb718..de8156c6b292 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -45,9 +45,17 @@ struct geneve_net { static unsigned int geneve_net_id; +struct geneve_dev_node { + struct hlist_node hlist; + struct geneve_dev *geneve; +}; + /* Pseudo network device */ struct geneve_dev { - struct hlist_node hlist; /* vni hash table */ + struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ +#if IS_ENABLED(CONFIG_IPV6) + struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ +#endif struct net *net; /* netns for packet i/o */ struct net_device *dev; /* netdev for geneve tunnel */ struct ip_tunnel_info info; @@ -123,16 +131,16 @@ static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, __be32 addr, u8 vni[]) { struct hlist_head *vni_list_head; - struct geneve_dev *geneve; + struct geneve_dev_node *node; __u32 hash; /* Find the device for this VNI */ hash = geneve_net_vni_hash(vni); vni_list_head = &gs->vni_list[hash]; - hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { - if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) && - addr == geneve->info.key.u.ipv4.dst) - return geneve; + hlist_for_each_entry_rcu(node, vni_list_head, hlist) { + if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) && + addr == node->geneve->info.key.u.ipv4.dst) + return node->geneve; } return NULL; } @@ -142,16 +150,16 @@ static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, struct in6_addr addr6, u8 vni[]) { struct hlist_head *vni_list_head; - struct geneve_dev *geneve; + struct geneve_dev_node *node; __u32 hash; /* Find the device for this VNI */ hash = geneve_net_vni_hash(vni); vni_list_head = &gs->vni_list[hash]; - hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { - if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) && - ipv6_addr_equal(&addr6, &geneve->info.key.u.ipv6.dst)) - return geneve; + hlist_for_each_entry_rcu(node, vni_list_head, hlist) { + if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) && + ipv6_addr_equal(&addr6, &node->geneve->info.key.u.ipv6.dst)) + return node->geneve; } return NULL; } @@ -591,6 +599,7 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) { struct net *net = geneve->net; struct geneve_net *gn = net_generic(net, geneve_net_id); + struct geneve_dev_node *node; struct geneve_sock *gs; __u8 vni[3]; __u32 hash; @@ -609,15 +618,20 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) out: gs->collect_md = geneve->collect_md; #if IS_ENABLED(CONFIG_IPV6) - if (ipv6) + if (ipv6) { rcu_assign_pointer(geneve->sock6, gs); - else + node = &geneve->hlist6; + } else #endif + { rcu_assign_pointer(geneve->sock4, gs); + node = &geneve->hlist4; + } + node->geneve = geneve; tunnel_id_to_vni(geneve->info.key.tun_id, vni); hash = geneve_net_vni_hash(vni); - hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]); + hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); return 0; } @@ -644,8 +658,10 @@ static int geneve_stop(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); - if (!hlist_unhashed(&geneve->hlist)) - hlist_del_rcu(&geneve->hlist); + hlist_del_init_rcu(&geneve->hlist4.hlist); +#if IS_ENABLED(CONFIG_IPV6) + hlist_del_init_rcu(&geneve->hlist6.hlist); +#endif geneve_sock_release(geneve); return 0; } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 991372150463..63c98bbbc596 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -752,7 +752,7 @@ static int netvsc_set_channels(struct net_device *net, channels->rx_count || channels->tx_count || channels->other_count) return -EINVAL; - if (count > net->num_tx_queues || count > net->num_rx_queues) + if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX) return -EINVAL; if (!nvdev || nvdev->destroy) @@ -1179,7 +1179,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, rndis_dev = ndev->extension; if (indir) { for (i = 0; i < ITAB_NUM; i++) - if (indir[i] >= dev->num_rx_queues) + if (indir[i] >= VRSS_CHANNEL_MAX) return -EINVAL; for (i = 0; i < ITAB_NUM; i++) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9ffff0362a11..0f581ee74fe4 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -39,16 +39,20 @@ #define MACVLAN_HASH_SIZE (1<<MACVLAN_HASH_BITS) #define MACVLAN_BC_QUEUE_LEN 1000 +#define MACVLAN_F_PASSTHRU 1 +#define MACVLAN_F_ADDRCHANGE 2 + struct macvlan_port { struct net_device *dev; struct hlist_head vlan_hash[MACVLAN_HASH_SIZE]; struct list_head vlans; struct sk_buff_head bc_queue; struct work_struct bc_work; - bool passthru; + u32 flags; int count; struct hlist_head vlan_source_hash[MACVLAN_HASH_SIZE]; DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ); + unsigned char perm_addr[ETH_ALEN]; }; struct macvlan_source_entry { @@ -66,6 +70,31 @@ struct macvlan_skb_cb { static void macvlan_port_destroy(struct net_device *dev); +static inline bool macvlan_passthru(const struct macvlan_port *port) +{ + return port->flags & MACVLAN_F_PASSTHRU; +} + +static inline void macvlan_set_passthru(struct macvlan_port *port) +{ + port->flags |= MACVLAN_F_PASSTHRU; +} + +static inline bool macvlan_addr_change(const struct macvlan_port *port) +{ + return port->flags & MACVLAN_F_ADDRCHANGE; +} + +static inline void macvlan_set_addr_change(struct macvlan_port *port) +{ + port->flags |= MACVLAN_F_ADDRCHANGE; +} + +static inline void macvlan_clear_addr_change(struct macvlan_port *port) +{ + port->flags &= ~MACVLAN_F_ADDRCHANGE; +} + /* Hash Ethernet address */ static u32 macvlan_eth_hash(const unsigned char *addr) { @@ -181,11 +210,12 @@ static void macvlan_hash_change_addr(struct macvlan_dev *vlan, static bool macvlan_addr_busy(const struct macvlan_port *port, const unsigned char *addr) { - /* Test to see if the specified multicast address is + /* Test to see if the specified address is * currently in use by the underlying device or * another macvlan. */ - if (ether_addr_equal_64bits(port->dev->dev_addr, addr)) + if (!macvlan_passthru(port) && !macvlan_addr_change(port) && + ether_addr_equal_64bits(port->dev->dev_addr, addr)) return true; if (macvlan_hash_lookup(port, addr)) @@ -445,7 +475,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) } macvlan_forward_source(skb, port, eth->h_source); - if (port->passthru) + if (macvlan_passthru(port)) vlan = list_first_or_null_rcu(&port->vlans, struct macvlan_dev, list); else @@ -574,7 +604,7 @@ static int macvlan_open(struct net_device *dev) struct net_device *lowerdev = vlan->lowerdev; int err; - if (vlan->port->passthru) { + if (macvlan_passthru(vlan->port)) { if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) { err = dev_set_promiscuity(lowerdev, 1); if (err < 0) @@ -649,7 +679,7 @@ static int macvlan_stop(struct net_device *dev) dev_uc_unsync(lowerdev, dev); dev_mc_unsync(lowerdev, dev); - if (vlan->port->passthru) { + if (macvlan_passthru(vlan->port)) { if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) dev_set_promiscuity(lowerdev, -1); goto hash_del; @@ -672,6 +702,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; + struct macvlan_port *port = vlan->port; int err; if (!(dev->flags & IFF_UP)) { @@ -682,7 +713,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr) if (macvlan_addr_busy(vlan->port, addr)) return -EBUSY; - if (!vlan->port->passthru) { + if (!macvlan_passthru(port)) { err = dev_uc_add(lowerdev, addr); if (err) return err; @@ -692,6 +723,15 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr) macvlan_hash_change_addr(vlan, addr); } + if (macvlan_passthru(port) && !macvlan_addr_change(port)) { + /* Since addr_change isn't set, we are here due to lower + * device change. Save the lower-dev address so we can + * restore it later. + */ + ether_addr_copy(vlan->port->perm_addr, + lowerdev->dev_addr); + } + macvlan_clear_addr_change(port); return 0; } @@ -703,8 +743,14 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - if (vlan->mode == MACVLAN_MODE_PASSTHRU) + /* If the addresses are the same, this is a no-op */ + if (ether_addr_equal(dev->dev_addr, addr->sa_data)) + return 0; + + if (vlan->mode == MACVLAN_MODE_PASSTHRU) { + macvlan_set_addr_change(vlan->port); return dev_set_mac_address(vlan->lowerdev, addr); + } return macvlan_sync_address(dev, addr->sa_data); } @@ -926,7 +972,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* Support unicast filter only on passthru devices. * Multicast filter should be allowed on all devices. */ - if (!vlan->port->passthru && is_unicast_ether_addr(addr)) + if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr)) return -EOPNOTSUPP; if (flags & NLM_F_REPLACE) @@ -950,7 +996,7 @@ static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], /* Support unicast filter only on passthru devices. * Multicast filter should be allowed on all devices. */ - if (!vlan->port->passthru && is_unicast_ether_addr(addr)) + if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr)) return -EOPNOTSUPP; if (is_unicast_ether_addr(addr)) @@ -1118,8 +1164,8 @@ static int macvlan_port_create(struct net_device *dev) if (port == NULL) return -ENOMEM; - port->passthru = false; port->dev = dev; + ether_addr_copy(port->perm_addr, dev->dev_addr); INIT_LIST_HEAD(&port->vlans); for (i = 0; i < MACVLAN_HASH_SIZE; i++) INIT_HLIST_HEAD(&port->vlan_hash[i]); @@ -1159,6 +1205,18 @@ static void macvlan_port_destroy(struct net_device *dev) kfree_skb(skb); } + /* If the lower device address has been changed by passthru + * macvlan, put it back. + */ + if (macvlan_passthru(port) && + !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) { + struct sockaddr sa; + + sa.sa_family = port->dev->type; + memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len); + dev_set_mac_address(port->dev, &sa); + } + kfree(port); } @@ -1325,7 +1383,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, port = macvlan_port_get_rtnl(lowerdev); /* Only 1 macvlan device can be created in passthru mode */ - if (port->passthru) { + if (macvlan_passthru(port)) { /* The macvlan port must be not created this time, * still goto destroy_macvlan_port for readability. */ @@ -1351,7 +1409,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, err = -EINVAL; goto destroy_macvlan_port; } - port->passthru = true; + macvlan_set_passthru(port); eth_hw_addr_inherit(dev, lowerdev); } @@ -1435,7 +1493,7 @@ static int macvlan_changelink(struct net_device *dev, if (data && data[IFLA_MACVLAN_FLAGS]) { __u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC; - if (vlan->port->passthru && promisc) { + if (macvlan_passthru(vlan->port) && promisc) { int err; if (flags & MACVLAN_FLAG_NOPROMISC) @@ -1598,7 +1656,7 @@ static int macvlan_device_event(struct notifier_block *unused, } break; case NETDEV_CHANGEADDR: - if (!port->passthru) + if (!macvlan_passthru(port)) return NOTIFY_DONE; vlan = list_first_entry_or_null(&port->vlans, diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index ed0d10f54f26..c3065236ffcc 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -908,7 +908,7 @@ static void decode_txts(struct dp83640_private *dp83640, if (overflow) { pr_debug("tx timestamp queue overflow, count %d\n", overflow); while (skb) { - skb_complete_tx_timestamp(skb, NULL); + kfree_skb(skb); skb = skb_dequeue(&dp83640->tx_queue); } return; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 8400403b3f62..5d314f143aea 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -2171,6 +2171,7 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, + .set_loopback = genphy_loopback, }, { .phy_id = MARVELL_PHY_ID_88E1540, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 9365b0792309..fdb43dd9b5cd 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -620,6 +620,8 @@ static int ksz9031_read_status(struct phy_device *phydev) if ((regval & 0xFF) == 0xFF) { phy_init_hw(phydev); phydev->link = 0; + if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) + phydev->drv->config_intr(phydev); } return 0; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index acf00f071c9a..1790f7fec125 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1136,6 +1136,39 @@ int phy_resume(struct phy_device *phydev) } EXPORT_SYMBOL(phy_resume); +int phy_loopback(struct phy_device *phydev, bool enable) +{ + struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver); + int ret = 0; + + mutex_lock(&phydev->lock); + + if (enable && phydev->loopback_enabled) { + ret = -EBUSY; + goto out; + } + + if (!enable && !phydev->loopback_enabled) { + ret = -EINVAL; + goto out; + } + + if (phydev->drv && phydrv->set_loopback) + ret = phydrv->set_loopback(phydev, enable); + else + ret = -EOPNOTSUPP; + + if (ret) + goto out; + + phydev->loopback_enabled = enable; + +out: + mutex_unlock(&phydev->lock); + return ret; +} +EXPORT_SYMBOL(phy_loopback); + /* Generic PHY support and helper functions */ /** @@ -1584,6 +1617,23 @@ int genphy_resume(struct phy_device *phydev) } EXPORT_SYMBOL(genphy_resume); +int genphy_loopback(struct phy_device *phydev, bool enable) +{ + int value; + + value = phy_read(phydev, MII_BMCR); + if (value < 0) + return value; + + if (enable) + value |= BMCR_LOOPBACK; + else + value &= ~BMCR_LOOPBACK; + + return phy_write(phydev, MII_BMCR, value); +} +EXPORT_SYMBOL(genphy_loopback); + static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) { /* The default values for phydev->supported are provided by the PHY @@ -1829,6 +1879,7 @@ static struct phy_driver genphy_driver = { .read_status = genphy_read_status, .suspend = genphy_suspend, .resume = genphy_resume, + .set_loopback = genphy_loopback, }; static int __init phy_init(void) diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 300bb1479b3a..e9f101c9bae2 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -201,7 +201,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) rionet_queue_tx_msg(skb, ndev, nets[rnet->mport->id].active[i]); if (count) - atomic_inc(&skb->users); + refcount_inc(&skb->users); count++; } } else if (RIONET_MAC_MATCH(eth->h_dest)) { diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 793ce900dffa..f32261ecd215 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1725,6 +1725,18 @@ static const struct driver_info lenovo_info = { .tx_fixup = ax88179_tx_fixup, }; +static const struct driver_info belkin_info = { + .description = "Belkin USB Ethernet Adapter", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + static const struct usb_device_id products[] = { { /* ASIX AX88179 10/100/1000 */ @@ -1754,6 +1766,10 @@ static const struct usb_device_id products[] = { /* Lenovo OneLinkDock Gigabit LAN */ USB_DEVICE(0x17ef, 0x304b), .driver_info = (unsigned long)&lenovo_info, +}, { + /* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */ + USB_DEVICE(0x050d, 0x0128), + .driver_info = (unsigned long)&belkin_info, }, { }, }; diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 18fa45fc979b..7220cd620717 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -643,6 +643,13 @@ static const struct usb_device_id mbim_devs[] = { .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end, }, + /* The HP lt4132 (03f0:a31d) is a rebranded Huawei ME906s-158, + * therefore it too requires the above "NDP to end" quirk. + */ + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end, + }, + /* Telit LE922A6 in MBIM composition */ { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1041, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 2067743f51ca..d103a1d4fb36 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -89,6 +89,8 @@ static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = { CDC_NCM_SIMPLE_STAT(rx_ntbs), }; +#define CDC_NCM_LOW_MEM_MAX_CNT 10 + static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset) { switch (sset) { @@ -1055,10 +1057,10 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ /* align new NDP */ if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) - cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); + cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size); /* verify that there is room for the NDP and the datagram (reserve) */ - if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size) + if ((ctx->tx_curr_size - skb->len - reserve) < ctx->max_ndp_size) return NULL; /* link to it */ @@ -1111,13 +1113,41 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* allocate a new OUT skb */ if (!skb_out) { - skb_out = alloc_skb(ctx->tx_max, GFP_ATOMIC); + if (ctx->tx_low_mem_val == 0) { + ctx->tx_curr_size = ctx->tx_max; + skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC); + /* If the memory allocation fails we will wait longer + * each time before attempting another full size + * allocation again to not overload the system + * further. + */ + if (skb_out == NULL) { + ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1, + (unsigned)CDC_NCM_LOW_MEM_MAX_CNT); + ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt; + } + } if (skb_out == NULL) { - if (skb != NULL) { - dev_kfree_skb_any(skb); - dev->net->stats.tx_dropped++; + /* See if a very small allocation is possible. + * We will send this packet immediately and hope + * that there is more memory available later. + */ + if (skb) + ctx->tx_curr_size = max(skb->len, + (u32)USB_CDC_NCM_NTB_MIN_OUT_SIZE); + else + ctx->tx_curr_size = USB_CDC_NCM_NTB_MIN_OUT_SIZE; + skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC); + + /* No allocation possible so we will abort */ + if (skb_out == NULL) { + if (skb != NULL) { + dev_kfree_skb_any(skb); + dev->net->stats.tx_dropped++; + } + goto exit_no_skb; } - goto exit_no_skb; + ctx->tx_low_mem_val--; } /* fill out the initial 16-bit NTB header */ nth16 = skb_put_zero(skb_out, sizeof(struct usb_cdc_ncm_nth16)); @@ -1148,10 +1178,10 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) ndp16 = cdc_ncm_ndp(ctx, skb_out, sign, skb->len + ctx->tx_modulus + ctx->tx_remainder); /* align beginning of next frame */ - cdc_ncm_align_tail(skb_out, ctx->tx_modulus, ctx->tx_remainder, ctx->tx_max); + cdc_ncm_align_tail(skb_out, ctx->tx_modulus, ctx->tx_remainder, ctx->tx_curr_size); /* check if we had enough room left for both NDP and frame */ - if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_max) { + if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_curr_size) { if (n == 0) { /* won't fit, MTU problem? */ dev_kfree_skb_any(skb); @@ -1227,7 +1257,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* If requested, put NDP at end of frame. */ if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data; - cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max); + cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size); nth16->wNdpIndex = cpu_to_le16(skb_out->len); skb_put_data(skb_out, ctx->delayed_ndp16, ctx->max_ndp_size); @@ -1246,9 +1276,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) */ if (!(dev->driver_info->flags & FLAG_SEND_ZLP) && skb_out->len > ctx->min_tx_pkt) { - padding_count = ctx->tx_max - skb_out->len; + padding_count = ctx->tx_curr_size - skb_out->len; skb_put_zero(skb_out, padding_count); - } else if (skb_out->len < ctx->tx_max && + } else if (skb_out->len < ctx->tx_curr_size && (skb_out->len % dev->maxpacket) == 0) { skb_put_u8(skb_out, 0); /* force short packet */ } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index b33553b1e19c..f5438d0978ca 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -385,7 +385,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, tbp = tb; } - if (tbp[IFLA_IFNAME]) { + if (ifmp && tbp[IFLA_IFNAME]) { nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); name_assign_type = NET_NAME_USER; } else { @@ -404,7 +404,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, return PTR_ERR(peer); } - if (tbp[IFLA_ADDRESS] == NULL) + if (!ifmp || !tbp[IFLA_ADDRESS]) eth_hw_addr_random(peer); if (ifmp && (dev->ifindex != 0)) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5c6388fb7dd1..2e69bcdc5b07 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1802,6 +1802,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev) flush_work(&vi->config_work); netif_device_detach(vi->dev); + netif_tx_disable(vi->dev); cancel_delayed_work_sync(&vi->refill); if (netif_running(vi->dev)) { diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 0dafd8e6c665..b04e103350fb 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -229,25 +229,25 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex, __be32 vni) { - struct vxlan_dev *vxlan; + struct vxlan_dev_node *node; /* For flow based devices, map all packets to VNI 0 */ if (vs->flags & VXLAN_F_COLLECT_METADATA) vni = 0; - hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) { - if (vxlan->default_dst.remote_vni != vni) + hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) { + if (node->vxlan->default_dst.remote_vni != vni) continue; if (IS_ENABLED(CONFIG_IPV6)) { - const struct vxlan_config *cfg = &vxlan->cfg; + const struct vxlan_config *cfg = &node->vxlan->cfg; if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) && cfg->remote_ifindex != ifindex) continue; } - return vxlan; + return node->vxlan; } return NULL; @@ -2387,17 +2387,22 @@ static void vxlan_vs_del_dev(struct vxlan_dev *vxlan) struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); spin_lock(&vn->sock_lock); - hlist_del_init_rcu(&vxlan->hlist); + hlist_del_init_rcu(&vxlan->hlist4.hlist); +#if IS_ENABLED(CONFIG_IPV6) + hlist_del_init_rcu(&vxlan->hlist6.hlist); +#endif spin_unlock(&vn->sock_lock); } -static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan) +static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan, + struct vxlan_dev_node *node) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); __be32 vni = vxlan->default_dst.remote_vni; + node->vxlan = vxlan; spin_lock(&vn->sock_lock); - hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni)); + hlist_add_head_rcu(&node->hlist, vni_head(vs, vni)); spin_unlock(&vn->sock_lock); } @@ -2656,7 +2661,6 @@ static void vxlan_setup(struct net_device *dev) vxlan->age_timer.data = (unsigned long) vxlan; vxlan->dev = dev; - vxlan->net = dev_net(dev); gro_cells_init(&vxlan->gro_cells, dev); @@ -2727,7 +2731,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], } if (tb[IFLA_MTU]) { - u32 mtu = nla_get_u32(data[IFLA_MTU]); + u32 mtu = nla_get_u32(tb[IFLA_MTU]); if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU) return -EINVAL; @@ -2850,6 +2854,7 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_sock *vs = NULL; + struct vxlan_dev_node *node; if (!vxlan->cfg.no_share) { spin_lock(&vn->sock_lock); @@ -2867,12 +2872,16 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) if (IS_ERR(vs)) return PTR_ERR(vs); #if IS_ENABLED(CONFIG_IPV6) - if (ipv6) + if (ipv6) { rcu_assign_pointer(vxlan->vn6_sock, vs); - else + node = &vxlan->hlist6; + } else #endif + { rcu_assign_pointer(vxlan->vn4_sock, vs); - vxlan_vs_add_dev(vs, vxlan); + node = &vxlan->hlist4; + } + vxlan_vs_add_dev(vs, vxlan, node); return 0; } @@ -3028,7 +3037,9 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf, static void vxlan_config_apply(struct net_device *dev, struct vxlan_config *conf, - struct net_device *lowerdev, bool changelink) + struct net_device *lowerdev, + struct net *src_net, + bool changelink) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_rdst *dst = &vxlan->default_dst; @@ -3044,6 +3055,8 @@ static void vxlan_config_apply(struct net_device *dev, if (conf->mtu) dev->mtu = conf->mtu; + + vxlan->net = src_net; } dst->remote_vni = conf->vni; @@ -3086,7 +3099,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, if (ret) return ret; - vxlan_config_apply(dev, conf, lowerdev, changelink); + vxlan_config_apply(dev, conf, lowerdev, src_net, changelink); return 0; } diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 530586be05b4..5b1d2e8402d9 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned long remaining_credit; struct timer_list credit_timeout; u64 credit_window_start; + bool rate_limited; /* Statistics */ struct xenvif_stats stats; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 8397f6c92451..e322a862ddfe 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete_done(napi, work_done); - xenvif_napi_schedule_or_enable_events(queue); + /* If the queue is rate-limited, it shall be + * rescheduled in the timer callback. + */ + if (likely(!queue->rate_limited)) + xenvif_napi_schedule_or_enable_events(queue); } return work_done; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 602d408fa25e..5042ff8d449a 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue *queue) max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ queue->remaining_credit = min(max_credit, max_burst); + queue->rate_limited = false; } void xenvif_tx_credit_callback(unsigned long data) @@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) msecs_to_jiffies(queue->credit_usec / 1000); /* Timer could already be pending in rare cases. */ - if (timer_pending(&queue->credit_timeout)) + if (timer_pending(&queue->credit_timeout)) { + queue->rate_limited = true; return true; + } /* Passed the point where we can replenish credit? */ if (time_after_eq64(now, next_credit)) { @@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) mod_timer(&queue->credit_timeout, next_credit); queue->credit_window_start = next_credit; + queue->rate_limited = true; return true; } diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig index c4208487fadc..b065eb605215 100644 --- a/drivers/nfc/Kconfig +++ b/drivers/nfc/Kconfig @@ -7,7 +7,7 @@ menu "Near Field Communication (NFC) devices" config NFC_TRF7970A tristate "Texas Instruments TRF7970a NFC driver" - depends on SPI && NFC_DIGITAL + depends on SPI && NFC_DIGITAL && GPIOLIB help This option enables the NFC driver for Texas Instruments' TRF7970a device. Such device supports 5 different protocols: ISO14443A, diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index badd8167ac73..ec50027b0d8b 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -749,11 +749,9 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, u32 protocols; int r; - info = kzalloc(sizeof(struct fdp_nci_info), GFP_KERNEL); - if (!info) { - r = -ENOMEM; - goto err_info_alloc; - } + info = devm_kzalloc(dev, sizeof(struct fdp_nci_info), GFP_KERNEL); + if (!info) + return -ENOMEM; info->phy = phy; info->phy_ops = phy_ops; @@ -775,8 +773,7 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, tx_tailroom); if (!ndev) { nfc_err(dev, "Cannot allocate nfc ndev\n"); - r = -ENOMEM; - goto err_alloc_ndev; + return -ENOMEM; } r = nci_register_device(ndev); @@ -792,9 +789,6 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops, err_regdev: nci_free_device(ndev); -err_alloc_ndev: - kfree(info); -err_info_alloc: return r; } EXPORT_SYMBOL(fdp_nci_probe); @@ -808,7 +802,6 @@ void fdp_nci_remove(struct nci_dev *ndev) nci_unregister_device(ndev); nci_free_device(ndev); - kfree(info); } EXPORT_SYMBOL(fdp_nci_remove); diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index e0baec848ff2..c4da50e07bbc 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -27,7 +27,6 @@ #define FDP_I2C_DRIVER_NAME "fdp_nci_i2c" -#define FDP_DP_POWER_GPIO_NAME "power" #define FDP_DP_CLOCK_TYPE_NAME "clock-type" #define FDP_DP_CLOCK_FREQ_NAME "clock-freq" #define FDP_DP_FW_VSC_CFG_NAME "fw-vsc-cfg" @@ -281,8 +280,14 @@ vsc_read_err: *clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no"); } -static int fdp_nci_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static const struct acpi_gpio_params power_gpios = { 0, 0, false }; + +static const struct acpi_gpio_mapping acpi_fdp_gpios[] = { + { "power-gpios", &power_gpios, 1 }, + {}, +}; + +static int fdp_nci_i2c_probe(struct i2c_client *client) { struct fdp_i2c_phy *phy; struct device *dev = &client->dev; @@ -304,8 +309,7 @@ static int fdp_nci_i2c_probe(struct i2c_client *client, return -ENODEV; } - phy = devm_kzalloc(dev, sizeof(struct fdp_i2c_phy), - GFP_KERNEL); + phy = devm_kzalloc(dev, sizeof(struct fdp_i2c_phy), GFP_KERNEL); if (!phy) return -ENOMEM; @@ -313,19 +317,22 @@ static int fdp_nci_i2c_probe(struct i2c_client *client, phy->next_read_size = FDP_NCI_I2C_MIN_PAYLOAD; i2c_set_clientdata(client, phy); - r = request_threaded_irq(client->irq, NULL, fdp_nci_i2c_irq_thread_fn, - IRQF_TRIGGER_RISING | IRQF_ONESHOT, - FDP_I2C_DRIVER_NAME, phy); + r = devm_request_threaded_irq(dev, client->irq, + NULL, fdp_nci_i2c_irq_thread_fn, + IRQF_TRIGGER_RISING | IRQF_ONESHOT, + FDP_I2C_DRIVER_NAME, phy); if (r < 0) { nfc_err(&client->dev, "Unable to register IRQ handler\n"); return r; } - /* Requesting the power gpio */ - phy->power_gpio = devm_gpiod_get(dev, FDP_DP_POWER_GPIO_NAME, - GPIOD_OUT_LOW); + r = devm_acpi_dev_add_driver_gpios(dev, acpi_fdp_gpios); + if (r) + dev_dbg(dev, "Unable to add GPIO mapping table\n"); + /* Requesting the power gpio */ + phy->power_gpio = devm_gpiod_get(dev, "power", GPIOD_OUT_LOW); if (IS_ERR(phy->power_gpio)) { nfc_err(dev, "Power GPIO request failed\n"); return PTR_ERR(phy->power_gpio); @@ -360,12 +367,6 @@ static int fdp_nci_i2c_remove(struct i2c_client *client) return 0; } -static struct i2c_device_id fdp_nci_i2c_id_table[] = { - {"int339a", 0}, - {} -}; -MODULE_DEVICE_TABLE(i2c, fdp_nci_i2c_id_table); - static const struct acpi_device_id fdp_nci_i2c_acpi_match[] = { {"INT339A", 0}, {} @@ -377,8 +378,7 @@ static struct i2c_driver fdp_nci_i2c_driver = { .name = FDP_I2C_DRIVER_NAME, .acpi_match_table = ACPI_PTR(fdp_nci_i2c_acpi_match), }, - .id_table = fdp_nci_i2c_id_table, - .probe = fdp_nci_i2c_probe, + .probe_new = fdp_nci_i2c_probe, .remove = fdp_nci_i2c_remove, }; module_i2c_driver(fdp_nci_i2c_driver); diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c index f9f000c546d1..7f8960a46aab 100644 --- a/drivers/nfc/nfcmrvl/fw_dnld.c +++ b/drivers/nfc/nfcmrvl/fw_dnld.c @@ -457,7 +457,7 @@ int nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv) INIT_WORK(&priv->fw_dnld.rx_work, fw_dnld_rx_work); snprintf(name, sizeof(name), "%s_nfcmrvl_fw_dnld_rx_wq", - dev_name(priv->dev)); + dev_name(&priv->ndev->nfc_dev->dev)); priv->fw_dnld.rx_wq = create_singlethread_workqueue(name); if (!priv->fw_dnld.rx_wq) return -ENOMEM; @@ -494,6 +494,7 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name) { struct nfcmrvl_private *priv = nci_get_drvdata(ndev); struct nfcmrvl_fw_dnld *fw_dnld = &priv->fw_dnld; + int res; if (!priv->support_fw_dnld) return -ENOTSUPP; @@ -509,7 +510,9 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name) */ /* Retrieve FW binary */ - if (request_firmware(&fw_dnld->fw, firmware_name, priv->dev) < 0) { + res = request_firmware(&fw_dnld->fw, firmware_name, + &ndev->nfc_dev->dev); + if (res < 0) { nfc_err(priv->dev, "failed to retrieve FW %s", firmware_name); return -ENOENT; } diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index c5038e6447bd..e65d027b91fa 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -123,13 +123,14 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, memcpy(&priv->config, pdata, sizeof(*pdata)); - if (priv->config.reset_n_io) { - rc = devm_gpio_request_one(dev, - priv->config.reset_n_io, - GPIOF_OUT_INIT_LOW, - "nfcmrvl_reset_n"); - if (rc < 0) + if (gpio_is_valid(priv->config.reset_n_io)) { + rc = gpio_request_one(priv->config.reset_n_io, + GPIOF_OUT_INIT_LOW, + "nfcmrvl_reset_n"); + if (rc < 0) { + priv->config.reset_n_io = -EINVAL; nfc_err(dev, "failed to request reset_n io\n"); + } } if (phy == NFCMRVL_PHY_SPI) { @@ -154,7 +155,13 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, if (!priv->ndev) { nfc_err(dev, "nci_allocate_device failed\n"); rc = -ENOMEM; - goto error; + goto error_free_gpio; + } + + rc = nfcmrvl_fw_dnld_init(priv); + if (rc) { + nfc_err(dev, "failed to initialize FW download %d\n", rc); + goto error_free_dev; } nci_set_drvdata(priv->ndev, priv); @@ -162,24 +169,22 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy, rc = nci_register_device(priv->ndev); if (rc) { nfc_err(dev, "nci_register_device failed %d\n", rc); - goto error_free_dev; + goto error_fw_dnld_deinit; } /* Ensure that controller is powered off */ nfcmrvl_chip_halt(priv); - rc = nfcmrvl_fw_dnld_init(priv); - if (rc) { - nfc_err(dev, "failed to initialize FW download %d\n", rc); - goto error_free_dev; - } - nfc_info(dev, "registered with nci successfully\n"); return priv; +error_fw_dnld_deinit: + nfcmrvl_fw_dnld_deinit(priv); error_free_dev: nci_free_device(priv->ndev); -error: +error_free_gpio: + if (gpio_is_valid(priv->config.reset_n_io)) + gpio_free(priv->config.reset_n_io); kfree(priv); return ERR_PTR(rc); } @@ -194,8 +199,8 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv) nfcmrvl_fw_dnld_deinit(priv); - if (priv->config.reset_n_io) - devm_gpio_free(priv->dev, priv->config.reset_n_io); + if (gpio_is_valid(priv->config.reset_n_io)) + gpio_free(priv->config.reset_n_io); nci_unregister_device(ndev); nci_free_device(ndev); @@ -262,7 +267,6 @@ int nfcmrvl_parse_dt(struct device_node *node, reset_n_io = of_get_named_gpio(node, "reset-n-io", 0); if (reset_n_io < 0) { pr_info("no reset-n-io config\n"); - reset_n_io = 0; } else if (!gpio_is_valid(reset_n_io)) { pr_err("invalid reset-n-io GPIO\n"); return reset_n_io; diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c index 83a99e38e7bd..91162f8e0366 100644 --- a/drivers/nfc/nfcmrvl/uart.c +++ b/drivers/nfc/nfcmrvl/uart.c @@ -84,6 +84,7 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node, ret = nfcmrvl_parse_dt(matched_node, pdata); if (ret < 0) { pr_err("Failed to get generic entries\n"); + of_node_put(matched_node); return ret; } @@ -97,6 +98,8 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node, else pdata->break_control = 0; + of_node_put(matched_node); + return 0; } @@ -109,6 +112,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu) struct nfcmrvl_private *priv; struct nfcmrvl_platform_data *pdata = NULL; struct nfcmrvl_platform_data config; + struct device *dev = nu->tty->dev; /* * Platform data cannot be used here since usually it is already used @@ -116,9 +120,8 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu) * and check if DT entries were added. */ - if (nu->tty->dev->parent && nu->tty->dev->parent->of_node) - if (nfcmrvl_uart_parse_dt(nu->tty->dev->parent->of_node, - &config) == 0) + if (dev && dev->parent && dev->parent->of_node) + if (nfcmrvl_uart_parse_dt(dev->parent->of_node, &config) == 0) pdata = &config; if (!pdata) { @@ -131,7 +134,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu) } priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_UART, nu, &uart_ops, - nu->tty->dev, pdata); + dev, pdata); if (IS_ERR(priv)) return PTR_ERR(priv); diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c index 699aa9d16575..bd35eab652be 100644 --- a/drivers/nfc/nfcmrvl/usb.c +++ b/drivers/nfc/nfcmrvl/usb.c @@ -341,15 +341,13 @@ static int nfcmrvl_probe(struct usb_interface *intf, init_usb_anchor(&drv_data->deferred); priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_USB, drv_data, &usb_ops, - &drv_data->udev->dev, &config); + &intf->dev, &config); if (IS_ERR(priv)) return PTR_ERR(priv); drv_data->priv = priv; drv_data->priv->support_fw_dnld = false; - priv->dev = &drv_data->udev->dev; - usb_set_intfdata(intf, drv_data); return 0; diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c index a466e7978466..33449820e754 100644 --- a/drivers/nfc/nfcsim.c +++ b/drivers/nfc/nfcsim.c @@ -482,8 +482,10 @@ static int __init nfcsim_init(void) exit_err: pr_err("Failed to initialize nfcsim driver (%d)\n", rc); - nfcsim_link_free(link0); - nfcsim_link_free(link1); + if (link0) + nfcsim_link_free(link0); + if (link1) + nfcsim_link_free(link1); return rc; } diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index fedde9d46ab6..4b14740edb67 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -904,7 +904,7 @@ static int pn544_hci_i2c_probe(struct i2c_client *client, phy->i2c_dev = client; i2c_set_clientdata(client, phy); - r = acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), acpi_pn544_gpios); + r = devm_acpi_dev_add_driver_gpios(dev, acpi_pn544_gpios); if (r) dev_dbg(dev, "Unable to add GPIO mapping table\n"); @@ -958,7 +958,6 @@ static int pn544_hci_i2c_remove(struct i2c_client *client) if (phy->powered) pn544_hci_i2c_disable(phy); - acpi_dev_remove_driver_gpios(ACPI_COMPANION(&client->dev)); return 0; } diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c index 9dfae0efa922..515f08d037fb 100644 --- a/drivers/nfc/st-nci/i2c.c +++ b/drivers/nfc/st-nci/i2c.c @@ -19,15 +19,12 @@ #include <linux/module.h> #include <linux/i2c.h> -#include <linux/gpio.h> #include <linux/gpio/consumer.h> -#include <linux/of_irq.h> -#include <linux/of_gpio.h> #include <linux/acpi.h> #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/nfc.h> -#include <linux/platform_data/st-nci.h> +#include <linux/of.h> #include "st-nci.h" @@ -40,18 +37,16 @@ #define ST_NCI_I2C_MIN_SIZE 4 /* PCB(1) + NCI Packet header(3) */ #define ST_NCI_I2C_MAX_SIZE 250 /* req 4.2.1 */ +#define ST_NCI_DRIVER_NAME "st_nci" #define ST_NCI_I2C_DRIVER_NAME "st_nci_i2c" -#define ST_NCI_GPIO_NAME_RESET "reset" - struct st_nci_i2c_phy { struct i2c_client *i2c_dev; struct llt_ndlc *ndlc; bool irq_active; - unsigned int gpio_reset; - unsigned int irq_polarity; + struct gpio_desc *gpiod_reset; struct st_nci_se_status se_status; }; @@ -60,9 +55,9 @@ static int st_nci_i2c_enable(void *phy_id) { struct st_nci_i2c_phy *phy = phy_id; - gpio_set_value(phy->gpio_reset, 0); + gpiod_set_value(phy->gpiod_reset, 0); usleep_range(10000, 15000); - gpio_set_value(phy->gpio_reset, 1); + gpiod_set_value(phy->gpiod_reset, 1); usleep_range(80000, 85000); if (phy->ndlc->powered == 0 && phy->irq_active == 0) { @@ -208,114 +203,18 @@ static struct nfc_phy_ops i2c_phy_ops = { .disable = st_nci_i2c_disable, }; -static int st_nci_i2c_acpi_request_resources(struct i2c_client *client) -{ - struct st_nci_i2c_phy *phy = i2c_get_clientdata(client); - struct gpio_desc *gpiod_reset; - struct device *dev = &client->dev; - u8 tmp; - - /* Get RESET GPIO from ACPI */ - gpiod_reset = devm_gpiod_get_index(dev, ST_NCI_GPIO_NAME_RESET, 1, - GPIOD_OUT_HIGH); - if (IS_ERR(gpiod_reset)) { - nfc_err(dev, "Unable to get RESET GPIO\n"); - return -ENODEV; - } - - phy->gpio_reset = desc_to_gpio(gpiod_reset); - - phy->irq_polarity = irq_get_trigger_type(client->irq); - - phy->se_status.is_ese_present = false; - phy->se_status.is_uicc_present = false; - - if (device_property_present(dev, "ese-present")) { - device_property_read_u8(dev, "ese-present", &tmp); - phy->se_status.is_ese_present = tmp; - } - - if (device_property_present(dev, "uicc-present")) { - device_property_read_u8(dev, "uicc-present", &tmp); - phy->se_status.is_uicc_present = tmp; - } - - return 0; -} - -static int st_nci_i2c_of_request_resources(struct i2c_client *client) -{ - struct st_nci_i2c_phy *phy = i2c_get_clientdata(client); - struct device_node *pp; - int gpio; - int r; - - pp = client->dev.of_node; - if (!pp) - return -ENODEV; - - /* Get GPIO from device tree */ - gpio = of_get_named_gpio(pp, "reset-gpios", 0); - if (gpio < 0) { - nfc_err(&client->dev, - "Failed to retrieve reset-gpios from device tree\n"); - return gpio; - } - - /* GPIO request and configuration */ - r = devm_gpio_request_one(&client->dev, gpio, - GPIOF_OUT_INIT_HIGH, ST_NCI_GPIO_NAME_RESET); - if (r) { - nfc_err(&client->dev, "Failed to request reset pin\n"); - return r; - } - phy->gpio_reset = gpio; - - phy->irq_polarity = irq_get_trigger_type(client->irq); - - phy->se_status.is_ese_present = - of_property_read_bool(pp, "ese-present"); - phy->se_status.is_uicc_present = - of_property_read_bool(pp, "uicc-present"); - - return 0; -} - -static int st_nci_i2c_request_resources(struct i2c_client *client) -{ - struct st_nci_nfc_platform_data *pdata; - struct st_nci_i2c_phy *phy = i2c_get_clientdata(client); - int r; - - pdata = client->dev.platform_data; - if (pdata == NULL) { - nfc_err(&client->dev, "No platform data\n"); - return -EINVAL; - } +static const struct acpi_gpio_params reset_gpios = { 1, 0, false }; - /* store for later use */ - phy->gpio_reset = pdata->gpio_reset; - phy->irq_polarity = pdata->irq_polarity; - - r = devm_gpio_request_one(&client->dev, - phy->gpio_reset, GPIOF_OUT_INIT_HIGH, - ST_NCI_GPIO_NAME_RESET); - if (r) { - pr_err("%s : reset gpio_request failed\n", __FILE__); - return r; - } - - phy->se_status.is_ese_present = pdata->is_ese_present; - phy->se_status.is_uicc_present = pdata->is_uicc_present; - - return 0; -} +static const struct acpi_gpio_mapping acpi_st_nci_gpios[] = { + { "reset-gpios", &reset_gpios, 1 }, + {}, +}; static int st_nci_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { + struct device *dev = &client->dev; struct st_nci_i2c_phy *phy; - struct st_nci_nfc_platform_data *pdata; int r; dev_dbg(&client->dev, "%s\n", __func__); @@ -326,8 +225,7 @@ static int st_nci_i2c_probe(struct i2c_client *client, return -ENODEV; } - phy = devm_kzalloc(&client->dev, sizeof(struct st_nci_i2c_phy), - GFP_KERNEL); + phy = devm_kzalloc(dev, sizeof(struct st_nci_i2c_phy), GFP_KERNEL); if (!phy) return -ENOMEM; @@ -335,32 +233,22 @@ static int st_nci_i2c_probe(struct i2c_client *client, i2c_set_clientdata(client, phy); - pdata = client->dev.platform_data; - if (!pdata && client->dev.of_node) { - r = st_nci_i2c_of_request_resources(client); - if (r) { - nfc_err(&client->dev, "No platform data\n"); - return r; - } - } else if (pdata) { - r = st_nci_i2c_request_resources(client); - if (r) { - nfc_err(&client->dev, - "Cannot get platform resources\n"); - return r; - } - } else if (ACPI_HANDLE(&client->dev)) { - r = st_nci_i2c_acpi_request_resources(client); - if (r) { - nfc_err(&client->dev, "Cannot get ACPI data\n"); - return r; - } - } else { - nfc_err(&client->dev, - "st_nci platform resources not available\n"); + r = devm_acpi_dev_add_driver_gpios(dev, acpi_st_nci_gpios); + if (r) + dev_dbg(dev, "Unable to add GPIO mapping table\n"); + + /* Get RESET GPIO */ + phy->gpiod_reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(phy->gpiod_reset)) { + nfc_err(dev, "Unable to get RESET GPIO\n"); return -ENODEV; } + phy->se_status.is_ese_present = + device_property_read_bool(dev, "ese-present"); + phy->se_status.is_uicc_present = + device_property_read_bool(dev, "uicc-present"); + r = ndlc_probe(phy, &i2c_phy_ops, &client->dev, ST_NCI_FRAME_HEADROOM, ST_NCI_FRAME_TAILROOM, &phy->ndlc, &phy->se_status); @@ -372,7 +260,7 @@ static int st_nci_i2c_probe(struct i2c_client *client, phy->irq_active = true; r = devm_request_threaded_irq(&client->dev, client->irq, NULL, st_nci_irq_thread_fn, - phy->irq_polarity | IRQF_ONESHOT, + IRQF_ONESHOT, ST_NCI_DRIVER_NAME, phy); if (r < 0) nfc_err(&client->dev, "Unable to register IRQ handler\n"); diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c index 89e341eba3eb..14705591b0fb 100644 --- a/drivers/nfc/st-nci/spi.c +++ b/drivers/nfc/st-nci/spi.c @@ -19,16 +19,13 @@ #include <linux/module.h> #include <linux/spi/spi.h> -#include <linux/gpio.h> #include <linux/gpio/consumer.h> -#include <linux/of_irq.h> -#include <linux/of_gpio.h> #include <linux/acpi.h> #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/nfc.h> +#include <linux/of.h> #include <net/nfc/nci.h> -#include <linux/platform_data/st-nci.h> #include "st-nci.h" @@ -41,18 +38,16 @@ #define ST_NCI_SPI_MIN_SIZE 4 /* PCB(1) + NCI Packet header(3) */ #define ST_NCI_SPI_MAX_SIZE 250 /* req 4.2.1 */ +#define ST_NCI_DRIVER_NAME "st_nci" #define ST_NCI_SPI_DRIVER_NAME "st_nci_spi" -#define ST_NCI_GPIO_NAME_RESET "reset" - struct st_nci_spi_phy { struct spi_device *spi_dev; struct llt_ndlc *ndlc; bool irq_active; - unsigned int gpio_reset; - unsigned int irq_polarity; + struct gpio_desc *gpiod_reset; struct st_nci_se_status se_status; }; @@ -61,9 +56,9 @@ static int st_nci_spi_enable(void *phy_id) { struct st_nci_spi_phy *phy = phy_id; - gpio_set_value(phy->gpio_reset, 0); + gpiod_set_value(phy->gpiod_reset, 0); usleep_range(10000, 15000); - gpio_set_value(phy->gpio_reset, 1); + gpiod_set_value(phy->gpiod_reset, 1); usleep_range(80000, 85000); if (phy->ndlc->powered == 0 && phy->irq_active == 0) { @@ -223,113 +218,16 @@ static struct nfc_phy_ops spi_phy_ops = { .disable = st_nci_spi_disable, }; -static int st_nci_spi_acpi_request_resources(struct spi_device *spi_dev) -{ - struct st_nci_spi_phy *phy = spi_get_drvdata(spi_dev); - struct gpio_desc *gpiod_reset; - struct device *dev = &spi_dev->dev; - u8 tmp; - - /* Get RESET GPIO from ACPI */ - gpiod_reset = devm_gpiod_get_index(dev, ST_NCI_GPIO_NAME_RESET, 1, - GPIOD_OUT_HIGH); - if (IS_ERR(gpiod_reset)) { - nfc_err(dev, "Unable to get RESET GPIO\n"); - return -ENODEV; - } - - phy->gpio_reset = desc_to_gpio(gpiod_reset); - - phy->irq_polarity = irq_get_trigger_type(spi_dev->irq); - - phy->se_status.is_ese_present = false; - phy->se_status.is_uicc_present = false; - - if (device_property_present(dev, "ese-present")) { - device_property_read_u8(dev, "ese-present", &tmp); - tmp = phy->se_status.is_ese_present; - } - - if (device_property_present(dev, "uicc-present")) { - device_property_read_u8(dev, "uicc-present", &tmp); - tmp = phy->se_status.is_uicc_present; - } - - return 0; -} - -static int st_nci_spi_of_request_resources(struct spi_device *dev) -{ - struct st_nci_spi_phy *phy = spi_get_drvdata(dev); - struct device_node *pp; - int gpio; - int r; - - pp = dev->dev.of_node; - if (!pp) - return -ENODEV; - - /* Get GPIO from device tree */ - gpio = of_get_named_gpio(pp, "reset-gpios", 0); - if (gpio < 0) { - nfc_err(&dev->dev, - "Failed to retrieve reset-gpios from device tree\n"); - return gpio; - } - - /* GPIO request and configuration */ - r = devm_gpio_request_one(&dev->dev, gpio, - GPIOF_OUT_INIT_HIGH, ST_NCI_GPIO_NAME_RESET); - if (r) { - nfc_err(&dev->dev, "Failed to request reset pin\n"); - return r; - } - phy->gpio_reset = gpio; - - phy->irq_polarity = irq_get_trigger_type(dev->irq); +static const struct acpi_gpio_params reset_gpios = { 1, 0, false }; - phy->se_status.is_ese_present = - of_property_read_bool(pp, "ese-present"); - phy->se_status.is_uicc_present = - of_property_read_bool(pp, "uicc-present"); - - return 0; -} - -static int st_nci_spi_request_resources(struct spi_device *dev) -{ - struct st_nci_nfc_platform_data *pdata; - struct st_nci_spi_phy *phy = spi_get_drvdata(dev); - int r; - - pdata = dev->dev.platform_data; - if (pdata == NULL) { - nfc_err(&dev->dev, "No platform data\n"); - return -EINVAL; - } - - /* store for later use */ - phy->gpio_reset = pdata->gpio_reset; - phy->irq_polarity = pdata->irq_polarity; - - r = devm_gpio_request_one(&dev->dev, - phy->gpio_reset, GPIOF_OUT_INIT_HIGH, - ST_NCI_GPIO_NAME_RESET); - if (r) { - pr_err("%s : reset gpio_request failed\n", __FILE__); - return r; - } - - phy->se_status.is_ese_present = pdata->is_ese_present; - phy->se_status.is_uicc_present = pdata->is_uicc_present; - - return 0; -} +static const struct acpi_gpio_mapping acpi_st_nci_gpios[] = { + { "reset-gpios", &reset_gpios, 1 }, + {}, +}; static int st_nci_spi_probe(struct spi_device *dev) { struct st_nci_spi_phy *phy; - struct st_nci_nfc_platform_data *pdata; int r; dev_dbg(&dev->dev, "%s\n", __func__); @@ -351,32 +249,22 @@ static int st_nci_spi_probe(struct spi_device *dev) spi_set_drvdata(dev, phy); - pdata = dev->dev.platform_data; - if (!pdata && dev->dev.of_node) { - r = st_nci_spi_of_request_resources(dev); - if (r) { - nfc_err(&dev->dev, "No platform data\n"); - return r; - } - } else if (pdata) { - r = st_nci_spi_request_resources(dev); - if (r) { - nfc_err(&dev->dev, - "Cannot get platform resources\n"); - return r; - } - } else if (ACPI_HANDLE(&dev->dev)) { - r = st_nci_spi_acpi_request_resources(dev); - if (r) { - nfc_err(&dev->dev, "Cannot get ACPI data\n"); - return r; - } - } else { - nfc_err(&dev->dev, - "st_nci platform resources not available\n"); - return -ENODEV; + r = devm_acpi_dev_add_driver_gpios(&dev->dev, acpi_st_nci_gpios); + if (r) + dev_dbg(&dev->dev, "Unable to add GPIO mapping table\n"); + + /* Get RESET GPIO */ + phy->gpiod_reset = devm_gpiod_get(&dev->dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(phy->gpiod_reset)) { + nfc_err(&dev->dev, "Unable to get RESET GPIO\n"); + return PTR_ERR(phy->gpiod_reset); } + phy->se_status.is_ese_present = + device_property_read_bool(&dev->dev, "ese-present"); + phy->se_status.is_uicc_present = + device_property_read_bool(&dev->dev, "uicc-present"); + r = ndlc_probe(phy, &spi_phy_ops, &dev->dev, ST_NCI_FRAME_HEADROOM, ST_NCI_FRAME_TAILROOM, &phy->ndlc, &phy->se_status); @@ -388,7 +276,7 @@ static int st_nci_spi_probe(struct spi_device *dev) phy->irq_active = true; r = devm_request_threaded_irq(&dev->dev, dev->irq, NULL, st_nci_irq_thread_fn, - phy->irq_polarity | IRQF_ONESHOT, + IRQF_ONESHOT, ST_NCI_SPI_DRIVER_NAME, phy); if (r < 0) nfc_err(&dev->dev, "Unable to register IRQ handler\n"); diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index 4bff76baa341..cd1f7bfa75eb 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -61,8 +61,6 @@ #define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci" #define ST21NFCA_HCI_I2C_DRIVER_NAME "st21nfca_hci_i2c" -#define ST21NFCA_GPIO_NAME_EN "enable" - struct st21nfca_i2c_phy { struct i2c_client *i2c_dev; struct nfc_hci_dev *hdev; @@ -501,41 +499,17 @@ static struct nfc_phy_ops i2c_phy_ops = { .disable = st21nfca_hci_i2c_disable, }; -static int st21nfca_hci_i2c_acpi_request_resources(struct i2c_client *client) -{ - struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client); - struct device *dev = &client->dev; - - /* Get EN GPIO from ACPI */ - phy->gpiod_ena = devm_gpiod_get_index(dev, ST21NFCA_GPIO_NAME_EN, 1, - GPIOD_OUT_LOW); - if (IS_ERR(phy->gpiod_ena)) { - nfc_err(dev, "Unable to get ENABLE GPIO\n"); - return PTR_ERR(phy->gpiod_ena); - } - - return 0; -} - -static int st21nfca_hci_i2c_of_request_resources(struct i2c_client *client) -{ - struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client); - struct device *dev = &client->dev; - - /* Get GPIO from device tree */ - phy->gpiod_ena = devm_gpiod_get_index(dev, ST21NFCA_GPIO_NAME_EN, 0, - GPIOD_OUT_HIGH); - if (IS_ERR(phy->gpiod_ena)) { - nfc_err(dev, "Failed to request enable pin\n"); - return PTR_ERR(phy->gpiod_ena); - } +static const struct acpi_gpio_params enable_gpios = { 1, 0, false }; - return 0; -} +static const struct acpi_gpio_mapping acpi_st21nfca_gpios[] = { + { "enable-gpios", &enable_gpios, 1 }, + {}, +}; static int st21nfca_hci_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { + struct device *dev = &client->dev; struct st21nfca_i2c_phy *phy; int r; @@ -562,21 +536,15 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, mutex_init(&phy->phy_lock); i2c_set_clientdata(client, phy); - if (client->dev.of_node) { - r = st21nfca_hci_i2c_of_request_resources(client); - if (r) { - nfc_err(&client->dev, "No platform data\n"); - return r; - } - } else if (ACPI_HANDLE(&client->dev)) { - r = st21nfca_hci_i2c_acpi_request_resources(client); - if (r) { - nfc_err(&client->dev, "Cannot get ACPI data\n"); - return r; - } - } else { - nfc_err(&client->dev, "st21nfca platform resources not available\n"); - return -ENODEV; + r = devm_acpi_dev_add_driver_gpios(dev, acpi_st21nfca_gpios); + if (r) + dev_dbg(dev, "Unable to add GPIO mapping table\n"); + + /* Get EN GPIO from resource provider */ + phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); + if (IS_ERR(phy->gpiod_ena)) { + nfc_err(dev, "Unable to get ENABLE GPIO\n"); + return PTR_ERR(phy->gpiod_ena); } phy->se_status.is_ese_present = diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 2d1c8ca6e679..eee5cc1a9220 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -20,9 +20,8 @@ #include <linux/nfc.h> #include <linux/skbuff.h> #include <linux/delay.h> -#include <linux/gpio.h> +#include <linux/gpio/consumer.h> #include <linux/of.h> -#include <linux/of_gpio.h> #include <linux/spi/spi.h> #include <linux/regulator/consumer.h> @@ -123,11 +122,10 @@ NFC_PROTO_ISO14443_B_MASK | NFC_PROTO_FELICA_MASK | \ NFC_PROTO_ISO15693_MASK | NFC_PROTO_NFC_DEP_MASK) -#define TRF7970A_AUTOSUSPEND_DELAY 30000 /* 30 seconds */ +#define TRF7970A_AUTOSUSPEND_DELAY 30000 /* 30 seconds */ #define TRF7970A_13MHZ_CLOCK_FREQUENCY 13560000 #define TRF7970A_27MHZ_CLOCK_FREQUENCY 27120000 - #define TRF7970A_RX_SKB_ALLOC_SIZE 256 #define TRF7970A_FIFO_SIZE 127 @@ -152,7 +150,6 @@ */ #define TRF7970A_QUIRK_IRQ_STATUS_READ BIT(0) #define TRF7970A_QUIRK_EN2_MUST_STAY_LOW BIT(1) -#define TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE BIT(2) /* Direct commands */ #define TRF7970A_CMD_IDLE 0x00 @@ -295,7 +292,7 @@ #define TRF7970A_REG_IO_CTRL_AUTO_REG BIT(7) /* IRQ Status Register Bits */ -#define TRF7970A_IRQ_STATUS_NORESP BIT(0) /* ISO15693 only */ +#define TRF7970A_IRQ_STATUS_NORESP BIT(0) /* ISO15693 only */ #define TRF7970A_IRQ_STATUS_NFC_COL_ERROR BIT(0) #define TRF7970A_IRQ_STATUS_COL BIT(1) #define TRF7970A_IRQ_STATUS_FRAMING_EOF_ERROR BIT(2) @@ -451,16 +448,14 @@ struct trf7970a { u8 md_rf_tech; u8 tx_cmd; bool issue_eof; - bool adjust_resp_len; - int en2_gpio; - int en_gpio; + struct gpio_desc *en_gpiod; + struct gpio_desc *en2_gpiod; struct mutex lock; unsigned int timeout; bool ignore_timeout; struct delayed_work timeout_work; }; - static int trf7970a_cmd(struct trf7970a *trf, u8 opcode) { u8 cmd = TRF7970A_CMD_BIT_CTRL | TRF7970A_CMD_BIT_OPCODE(opcode); @@ -471,7 +466,7 @@ static int trf7970a_cmd(struct trf7970a *trf, u8 opcode) ret = spi_write(trf->spi, &cmd, 1); if (ret) dev_err(trf->dev, "%s - cmd: 0x%x, ret: %d\n", __func__, cmd, - ret); + ret); return ret; } @@ -483,14 +478,15 @@ static int trf7970a_read(struct trf7970a *trf, u8 reg, u8 *val) ret = spi_write_then_read(trf->spi, &addr, 1, val, 1); if (ret) dev_err(trf->dev, "%s - addr: 0x%x, ret: %d\n", __func__, addr, - ret); + ret); dev_dbg(trf->dev, "read(0x%x): 0x%x\n", addr, *val); return ret; } -static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf, size_t len) +static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf, + size_t len) { u8 addr = reg | TRF7970A_CMD_BIT_RW | TRF7970A_CMD_BIT_CONTINUOUS; struct spi_transfer t[2]; @@ -514,7 +510,7 @@ static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf, size_t len) ret = spi_sync(trf->spi, &m); if (ret) dev_err(trf->dev, "%s - addr: 0x%x, ret: %d\n", __func__, addr, - ret); + ret); return ret; } @@ -528,7 +524,7 @@ static int trf7970a_write(struct trf7970a *trf, u8 reg, u8 val) ret = spi_write(trf->spi, buf, 2); if (ret) dev_err(trf->dev, "%s - write: 0x%x 0x%x, ret: %d\n", __func__, - buf[0], buf[1], ret); + buf[0], buf[1], ret); return ret; } @@ -550,7 +546,7 @@ static int trf7970a_read_irqstatus(struct trf7970a *trf, u8 *status) if (ret) dev_err(trf->dev, "%s - irqstatus: Status read failed: %d\n", - __func__, ret); + __func__, ret); else *status = buf[0]; @@ -564,12 +560,12 @@ static int trf7970a_read_target_proto(struct trf7970a *trf, u8 *target_proto) u8 addr; addr = TRF79070A_NFC_TARGET_PROTOCOL | TRF7970A_CMD_BIT_RW | - TRF7970A_CMD_BIT_CONTINUOUS; + TRF7970A_CMD_BIT_CONTINUOUS; ret = spi_write_then_read(trf->spi, &addr, 1, buf, 2); if (ret) dev_err(trf->dev, "%s - target_proto: Read failed: %d\n", - __func__, ret); + __func__, ret); else *target_proto = buf[0]; @@ -600,7 +596,7 @@ static int trf7970a_mode_detect(struct trf7970a *trf, u8 *rf_tech) break; default: dev_dbg(trf->dev, "%s - mode_detect: target_proto: 0x%x\n", - __func__, target_proto); + __func__, target_proto); return -EIO; } @@ -616,8 +612,8 @@ static void trf7970a_send_upstream(struct trf7970a *trf) if (trf->rx_skb && !IS_ERR(trf->rx_skb) && !trf->aborting) print_hex_dump_debug("trf7970a rx data: ", DUMP_PREFIX_NONE, - 16, 1, trf->rx_skb->data, trf->rx_skb->len, - false); + 16, 1, trf->rx_skb->data, trf->rx_skb->len, + false); trf->state = TRF7970A_ST_IDLE; @@ -632,13 +628,6 @@ static void trf7970a_send_upstream(struct trf7970a *trf) trf->aborting = false; } - if (trf->adjust_resp_len) { - if (trf->rx_skb) - skb_trim(trf->rx_skb, trf->rx_skb->len - 1); - - trf->adjust_resp_len = false; - } - trf->cb(trf->ddev, trf->cb_arg, trf->rx_skb); trf->rx_skb = NULL; @@ -657,7 +646,8 @@ static void trf7970a_send_err_upstream(struct trf7970a *trf, int errno) } static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb, - unsigned int len, u8 *prefix, unsigned int prefix_len) + unsigned int len, u8 *prefix, + unsigned int prefix_len) { struct spi_transfer t[2]; struct spi_message m; @@ -665,7 +655,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb, int ret; print_hex_dump_debug("trf7970a tx data: ", DUMP_PREFIX_NONE, - 16, 1, skb->data, len, false); + 16, 1, skb->data, len, false); spi_message_init(&m); @@ -682,7 +672,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb, ret = spi_sync(trf->spi, &m); if (ret) { dev_err(trf->dev, "%s - Can't send tx data: %d\n", __func__, - ret); + ret); return ret; } @@ -706,7 +696,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb, } dev_dbg(trf->dev, "Setting timeout for %d ms, state: %d\n", timeout, - trf->state); + trf->state); schedule_delayed_work(&trf->timeout_work, msecs_to_jiffies(timeout)); @@ -774,9 +764,9 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status) if (fifo_bytes > skb_tailroom(skb)) { skb = skb_copy_expand(skb, skb_headroom(skb), - max_t(int, fifo_bytes, - TRF7970A_RX_SKB_ALLOC_SIZE), - GFP_KERNEL); + max_t(int, fifo_bytes, + TRF7970A_RX_SKB_ALLOC_SIZE), + GFP_KERNEL); if (!skb) { trf7970a_send_err_upstream(trf, -ENOMEM); return; @@ -787,7 +777,7 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status) } ret = trf7970a_read_cont(trf, TRF7970A_FIFO_IO_REGISTER, - skb_put(skb, fifo_bytes), fifo_bytes); + skb_put(skb, fifo_bytes), fifo_bytes); if (ret) { trf7970a_send_err_upstream(trf, ret); return; @@ -795,8 +785,7 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status) /* If received Type 2 ACK/NACK, shift right 4 bits and pass up */ if ((trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T) && (skb->len == 1) && - (trf->special_fcn_reg1 == - TRF7970A_SPECIAL_FCN_REG1_4_BIT_RX)) { + (trf->special_fcn_reg1 == TRF7970A_SPECIAL_FCN_REG1_4_BIT_RX)) { skb->data[0] >>= 4; status = TRF7970A_IRQ_STATUS_SRX; } else { @@ -819,16 +808,16 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status) } no_rx_data: - if (status == TRF7970A_IRQ_STATUS_SRX) { /* Receive complete */ + if (status == TRF7970A_IRQ_STATUS_SRX) { /* Receive complete */ trf7970a_send_upstream(trf); return; } dev_dbg(trf->dev, "Setting timeout for %d ms\n", - TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT); + TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT); schedule_delayed_work(&trf->timeout_work, - msecs_to_jiffies(TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT)); + msecs_to_jiffies(TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT)); } static irqreturn_t trf7970a_irq(int irq, void *dev_id) @@ -851,7 +840,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id) } dev_dbg(trf->dev, "IRQ - state: %d, status: 0x%x\n", trf->state, - status); + status); if (!status) { mutex_unlock(&trf->lock); @@ -876,7 +865,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id) case TRF7970A_ST_WAIT_FOR_TX_FIFO: if (status & TRF7970A_IRQ_STATUS_TX) { trf->ignore_timeout = - !cancel_delayed_work(&trf->timeout_work); + !cancel_delayed_work(&trf->timeout_work); trf7970a_fill_fifo(trf); } else { trf7970a_send_err_upstream(trf, -EIO); @@ -886,11 +875,11 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id) case TRF7970A_ST_WAIT_FOR_RX_DATA_CONT: if (status & TRF7970A_IRQ_STATUS_SRX) { trf->ignore_timeout = - !cancel_delayed_work(&trf->timeout_work); + !cancel_delayed_work(&trf->timeout_work); trf7970a_drain_fifo(trf, status); } else if (status & TRF7970A_IRQ_STATUS_FIFO) { ret = trf7970a_read(trf, TRF7970A_FIFO_STATUS, - &fifo_bytes); + &fifo_bytes); fifo_bytes &= ~TRF7970A_FIFO_STATUS_OVERFLOW; @@ -899,14 +888,14 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id) else if (!fifo_bytes) trf7970a_cmd(trf, TRF7970A_CMD_FIFO_RESET); } else if ((status == TRF7970A_IRQ_STATUS_TX) || - (!trf->is_initiator && - (status == (TRF7970A_IRQ_STATUS_TX | - TRF7970A_IRQ_STATUS_NFC_RF)))) { + (!trf->is_initiator && + (status == (TRF7970A_IRQ_STATUS_TX | + TRF7970A_IRQ_STATUS_NFC_RF)))) { trf7970a_cmd(trf, TRF7970A_CMD_FIFO_RESET); if (!trf->timeout) { - trf->ignore_timeout = !cancel_delayed_work( - &trf->timeout_work); + trf->ignore_timeout = + !cancel_delayed_work(&trf->timeout_work); trf->rx_skb = ERR_PTR(0); trf7970a_send_upstream(trf); break; @@ -930,13 +919,13 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id) break; case NFC_DIGITAL_FRAMING_NFCA_ANTICOL_COMPLETE: ret = trf7970a_write(trf, - TRF7970A_SPECIAL_FCN_REG1, - TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL); + TRF7970A_SPECIAL_FCN_REG1, + TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL); if (ret) goto err_unlock_exit; trf->special_fcn_reg1 = - TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL; + TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL; break; default: break; @@ -944,7 +933,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id) if (iso_ctrl != trf->iso_ctrl) { ret = trf7970a_write(trf, TRF7970A_ISO_CTRL, - iso_ctrl); + iso_ctrl); if (ret) goto err_unlock_exit; @@ -961,7 +950,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id) case TRF7970A_ST_LISTENING: if (status & TRF7970A_IRQ_STATUS_SRX) { trf->ignore_timeout = - !cancel_delayed_work(&trf->timeout_work); + !cancel_delayed_work(&trf->timeout_work); trf7970a_drain_fifo(trf, status); } else if (!(status & TRF7970A_IRQ_STATUS_NFC_RF)) { trf7970a_send_err_upstream(trf, -EIO); @@ -970,7 +959,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id) case TRF7970A_ST_LISTENING_MD: if (status & TRF7970A_IRQ_STATUS_SRX) { trf->ignore_timeout = - !cancel_delayed_work(&trf->timeout_work); + !cancel_delayed_work(&trf->timeout_work); ret = trf7970a_mode_detect(trf, &trf->md_rf_tech); if (ret) { @@ -985,7 +974,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id) break; default: dev_err(trf->dev, "%s - Driver in invalid state: %d\n", - __func__, trf->state); + __func__, trf->state); } err_unlock_exit: @@ -1010,19 +999,19 @@ static void trf7970a_issue_eof(struct trf7970a *trf) trf->state = TRF7970A_ST_WAIT_FOR_RX_DATA; dev_dbg(trf->dev, "Setting timeout for %d ms, state: %d\n", - trf->timeout, trf->state); + trf->timeout, trf->state); schedule_delayed_work(&trf->timeout_work, - msecs_to_jiffies(trf->timeout)); + msecs_to_jiffies(trf->timeout)); } static void trf7970a_timeout_work_handler(struct work_struct *work) { struct trf7970a *trf = container_of(work, struct trf7970a, - timeout_work.work); + timeout_work.work); dev_dbg(trf->dev, "Timeout - state: %d, ignore_timeout: %d\n", - trf->state, trf->ignore_timeout); + trf->state, trf->ignore_timeout); mutex_lock(&trf->lock); @@ -1053,7 +1042,7 @@ static int trf7970a_init(struct trf7970a *trf) goto err_out; ret = trf7970a_write(trf, TRF7970A_REG_IO_CTRL, - trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1)); + trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1)); if (ret) goto err_out; @@ -1066,13 +1055,13 @@ static int trf7970a_init(struct trf7970a *trf) trf->chip_status_ctrl &= ~TRF7970A_CHIP_STATUS_RF_ON; ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL, - trf->modulator_sys_clk_ctrl); + trf->modulator_sys_clk_ctrl); if (ret) goto err_out; ret = trf7970a_write(trf, TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS, - TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLH_96 | - TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLL_32); + TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLH_96 | + TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLL_32); if (ret) goto err_out; @@ -1093,7 +1082,7 @@ err_out: static void trf7970a_switch_rf_off(struct trf7970a *trf) { if ((trf->state == TRF7970A_ST_PWR_OFF) || - (trf->state == TRF7970A_ST_RF_OFF)) + (trf->state == TRF7970A_ST_RF_OFF)) return; dev_dbg(trf->dev, "Switching rf off\n"); @@ -1117,9 +1106,9 @@ static int trf7970a_switch_rf_on(struct trf7970a *trf) pm_runtime_get_sync(trf->dev); - if (trf->state != TRF7970A_ST_RF_OFF) { /* Power on, RF off */ + if (trf->state != TRF7970A_ST_RF_OFF) { /* Power on, RF off */ dev_err(trf->dev, "%s - Incorrect state: %d\n", __func__, - trf->state); + trf->state); return -EINVAL; } @@ -1154,7 +1143,7 @@ static int trf7970a_switch_rf(struct nfc_digital_dev *ddev, bool on) break; default: dev_err(trf->dev, "%s - Invalid request: %d %d\n", - __func__, trf->state, on); + __func__, trf->state, on); trf7970a_switch_rf_off(trf); ret = -EINVAL; } @@ -1165,7 +1154,7 @@ static int trf7970a_switch_rf(struct nfc_digital_dev *ddev, bool on) break; default: dev_err(trf->dev, "%s - Invalid request: %d %d\n", - __func__, trf->state, on); + __func__, trf->state, on); ret = -EINVAL; /* FALLTHROUGH */ case TRF7970A_ST_IDLE: @@ -1190,36 +1179,36 @@ static int trf7970a_in_config_rf_tech(struct trf7970a *trf, int tech) case NFC_DIGITAL_RF_TECH_106A: trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_14443A_106; trf->modulator_sys_clk_ctrl = - (trf->modulator_sys_clk_ctrl & 0xf8) | - TRF7970A_MODULATOR_DEPTH_OOK; + (trf->modulator_sys_clk_ctrl & 0xf8) | + TRF7970A_MODULATOR_DEPTH_OOK; trf->guard_time = TRF7970A_GUARD_TIME_NFCA; break; case NFC_DIGITAL_RF_TECH_106B: trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_14443B_106; trf->modulator_sys_clk_ctrl = - (trf->modulator_sys_clk_ctrl & 0xf8) | - TRF7970A_MODULATOR_DEPTH_ASK10; + (trf->modulator_sys_clk_ctrl & 0xf8) | + TRF7970A_MODULATOR_DEPTH_ASK10; trf->guard_time = TRF7970A_GUARD_TIME_NFCB; break; case NFC_DIGITAL_RF_TECH_212F: trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_FELICA_212; trf->modulator_sys_clk_ctrl = - (trf->modulator_sys_clk_ctrl & 0xf8) | - TRF7970A_MODULATOR_DEPTH_ASK10; + (trf->modulator_sys_clk_ctrl & 0xf8) | + TRF7970A_MODULATOR_DEPTH_ASK10; trf->guard_time = TRF7970A_GUARD_TIME_NFCF; break; case NFC_DIGITAL_RF_TECH_424F: trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_FELICA_424; trf->modulator_sys_clk_ctrl = - (trf->modulator_sys_clk_ctrl & 0xf8) | - TRF7970A_MODULATOR_DEPTH_ASK10; + (trf->modulator_sys_clk_ctrl & 0xf8) | + TRF7970A_MODULATOR_DEPTH_ASK10; trf->guard_time = TRF7970A_GUARD_TIME_NFCF; break; case NFC_DIGITAL_RF_TECH_ISO15693: trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_15693_SGL_1OF4_2648; trf->modulator_sys_clk_ctrl = - (trf->modulator_sys_clk_ctrl & 0xf8) | - TRF7970A_MODULATOR_DEPTH_OOK; + (trf->modulator_sys_clk_ctrl & 0xf8) | + TRF7970A_MODULATOR_DEPTH_OOK; trf->guard_time = TRF7970A_GUARD_TIME_15693; break; default: @@ -1246,7 +1235,8 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field) u8 rssi; ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL, - trf->chip_status_ctrl | TRF7970A_CHIP_STATUS_REC_ON); + trf->chip_status_ctrl | + TRF7970A_CHIP_STATUS_REC_ON); if (ret) return ret; @@ -1261,7 +1251,7 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field) return ret; ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL, - trf->chip_status_ctrl); + trf->chip_status_ctrl); if (ret) return ret; @@ -1328,15 +1318,15 @@ static int trf7970a_in_config_framing(struct trf7970a *trf, int framing) trf->iso_ctrl = iso_ctrl; ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL, - trf->modulator_sys_clk_ctrl); + trf->modulator_sys_clk_ctrl); if (ret) return ret; } if (!(trf->chip_status_ctrl & TRF7970A_CHIP_STATUS_RF_ON)) { ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL, - trf->chip_status_ctrl | - TRF7970A_CHIP_STATUS_RF_ON); + trf->chip_status_ctrl | + TRF7970A_CHIP_STATUS_RF_ON); if (ret) return ret; @@ -1349,7 +1339,7 @@ static int trf7970a_in_config_framing(struct trf7970a *trf, int framing) } static int trf7970a_in_configure_hw(struct nfc_digital_dev *ddev, int type, - int param) + int param) { struct trf7970a *trf = nfc_digital_get_drvdata(ddev); int ret; @@ -1361,7 +1351,7 @@ static int trf7970a_in_configure_hw(struct nfc_digital_dev *ddev, int type, trf->is_initiator = true; if ((trf->state == TRF7970A_ST_PWR_OFF) || - (trf->state == TRF7970A_ST_RF_OFF)) { + (trf->state == TRF7970A_ST_RF_OFF)) { ret = trf7970a_switch_rf_on(trf); if (ret) goto err_unlock; @@ -1419,7 +1409,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb) * has to send an EOF in order to get a response. */ if ((trf->technology == NFC_DIGITAL_RF_TECH_106A) && - (trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T)) { + (trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T)) { if (req[0] == NFC_T2T_CMD_READ) special_fcn_reg1 = 0; else @@ -1427,7 +1417,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb) if (special_fcn_reg1 != trf->special_fcn_reg1) { ret = trf7970a_write(trf, TRF7970A_SPECIAL_FCN_REG1, - special_fcn_reg1); + special_fcn_reg1); if (ret) return ret; @@ -1447,7 +1437,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb) iso_ctrl |= TRF7970A_ISO_CTRL_15693_SGL_1OF4_2648; break; case (ISO15693_REQ_FLAG_SUB_CARRIER | - ISO15693_REQ_FLAG_DATA_RATE): + ISO15693_REQ_FLAG_DATA_RATE): iso_ctrl |= TRF7970A_ISO_CTRL_15693_DBL_1OF4_2669; break; } @@ -1460,23 +1450,18 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb) trf->iso_ctrl = iso_ctrl; } - if (trf->framing == NFC_DIGITAL_FRAMING_ISO15693_T5T) { - if (trf7970a_is_iso15693_write_or_lock(req[1]) && - (req[0] & ISO15693_REQ_FLAG_OPTION)) - trf->issue_eof = true; - else if ((trf->quirks & - TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE) && - (req[1] == ISO15693_CMD_READ_MULTIPLE_BLOCK)) - trf->adjust_resp_len = true; - } + if ((trf->framing == NFC_DIGITAL_FRAMING_ISO15693_T5T) && + trf7970a_is_iso15693_write_or_lock(req[1]) && + (req[0] & ISO15693_REQ_FLAG_OPTION)) + trf->issue_eof = true; } return 0; } static int trf7970a_send_cmd(struct nfc_digital_dev *ddev, - struct sk_buff *skb, u16 timeout, - nfc_digital_cmd_complete_t cb, void *arg) + struct sk_buff *skb, u16 timeout, + nfc_digital_cmd_complete_t cb, void *arg) { struct trf7970a *trf = nfc_digital_get_drvdata(ddev); u8 prefix[5]; @@ -1485,7 +1470,7 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev, u8 status; dev_dbg(trf->dev, "New request - state: %d, timeout: %d ms, len: %d\n", - trf->state, timeout, skb->len); + trf->state, timeout, skb->len); if (skb->len > TRF7970A_TX_MAX) return -EINVAL; @@ -1493,9 +1478,9 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev, mutex_lock(&trf->lock); if ((trf->state != TRF7970A_ST_IDLE) && - (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) { + (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) { dev_err(trf->dev, "%s - Bogus state: %d\n", __func__, - trf->state); + trf->state); ret = -EIO; goto out_err; } @@ -1509,7 +1494,7 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev, if (timeout) { trf->rx_skb = nfc_alloc_recv_skb(TRF7970A_RX_SKB_ALLOC_SIZE, - GFP_KERNEL); + GFP_KERNEL); if (!trf->rx_skb) { dev_dbg(trf->dev, "Can't alloc rx_skb\n"); ret = -ENOMEM; @@ -1546,14 +1531,14 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev, * That totals 5 bytes. */ prefix[0] = TRF7970A_CMD_BIT_CTRL | - TRF7970A_CMD_BIT_OPCODE(TRF7970A_CMD_FIFO_RESET); + TRF7970A_CMD_BIT_OPCODE(TRF7970A_CMD_FIFO_RESET); prefix[1] = TRF7970A_CMD_BIT_CTRL | - TRF7970A_CMD_BIT_OPCODE(trf->tx_cmd); + TRF7970A_CMD_BIT_OPCODE(trf->tx_cmd); prefix[2] = TRF7970A_CMD_BIT_CONTINUOUS | TRF7970A_TX_LENGTH_BYTE1; if (trf->framing == NFC_DIGITAL_FRAMING_NFCA_SHORT) { prefix[3] = 0x00; - prefix[4] = 0x0f; /* 7 bits */ + prefix[4] = 0x0f; /* 7 bits */ } else { prefix[3] = (len & 0xf00) >> 4; prefix[3] |= ((len & 0xf0) >> 4); @@ -1587,25 +1572,24 @@ static int trf7970a_tg_config_rf_tech(struct trf7970a *trf, int tech) switch (tech) { case NFC_DIGITAL_RF_TECH_106A: trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE | - TRF7970A_ISO_CTRL_NFC_CE | - TRF7970A_ISO_CTRL_NFC_CE_14443A; + TRF7970A_ISO_CTRL_NFC_CE | TRF7970A_ISO_CTRL_NFC_CE_14443A; trf->modulator_sys_clk_ctrl = - (trf->modulator_sys_clk_ctrl & 0xf8) | - TRF7970A_MODULATOR_DEPTH_OOK; + (trf->modulator_sys_clk_ctrl & 0xf8) | + TRF7970A_MODULATOR_DEPTH_OOK; break; case NFC_DIGITAL_RF_TECH_212F: trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE | - TRF7970A_ISO_CTRL_NFC_NFCF_212; + TRF7970A_ISO_CTRL_NFC_NFCF_212; trf->modulator_sys_clk_ctrl = - (trf->modulator_sys_clk_ctrl & 0xf8) | - TRF7970A_MODULATOR_DEPTH_ASK10; + (trf->modulator_sys_clk_ctrl & 0xf8) | + TRF7970A_MODULATOR_DEPTH_ASK10; break; case NFC_DIGITAL_RF_TECH_424F: trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE | - TRF7970A_ISO_CTRL_NFC_NFCF_424; + TRF7970A_ISO_CTRL_NFC_NFCF_424; trf->modulator_sys_clk_ctrl = - (trf->modulator_sys_clk_ctrl & 0xf8) | - TRF7970A_MODULATOR_DEPTH_ASK10; + (trf->modulator_sys_clk_ctrl & 0xf8) | + TRF7970A_MODULATOR_DEPTH_ASK10; break; default: dev_dbg(trf->dev, "Unsupported rf technology: %d\n", tech); @@ -1622,9 +1606,9 @@ static int trf7970a_tg_config_rf_tech(struct trf7970a *trf, int tech) * here. */ if ((trf->framing == NFC_DIGITAL_FRAMING_NFC_DEP_ACTIVATED) && - (trf->iso_ctrl_tech != trf->iso_ctrl)) { + (trf->iso_ctrl_tech != trf->iso_ctrl)) { ret = trf7970a_write(trf, TRF7970A_ISO_CTRL, - trf->iso_ctrl_tech); + trf->iso_ctrl_tech); trf->iso_ctrl = trf->iso_ctrl_tech; } @@ -1679,15 +1663,15 @@ static int trf7970a_tg_config_framing(struct trf7970a *trf, int framing) trf->iso_ctrl = iso_ctrl; ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL, - trf->modulator_sys_clk_ctrl); + trf->modulator_sys_clk_ctrl); if (ret) return ret; } if (!(trf->chip_status_ctrl & TRF7970A_CHIP_STATUS_RF_ON)) { ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL, - trf->chip_status_ctrl | - TRF7970A_CHIP_STATUS_RF_ON); + trf->chip_status_ctrl | + TRF7970A_CHIP_STATUS_RF_ON); if (ret) return ret; @@ -1698,7 +1682,7 @@ static int trf7970a_tg_config_framing(struct trf7970a *trf, int framing) } static int trf7970a_tg_configure_hw(struct nfc_digital_dev *ddev, int type, - int param) + int param) { struct trf7970a *trf = nfc_digital_get_drvdata(ddev); int ret; @@ -1710,7 +1694,7 @@ static int trf7970a_tg_configure_hw(struct nfc_digital_dev *ddev, int type, trf->is_initiator = false; if ((trf->state == TRF7970A_ST_PWR_OFF) || - (trf->state == TRF7970A_ST_RF_OFF)) { + (trf->state == TRF7970A_ST_RF_OFF)) { ret = trf7970a_switch_rf_on(trf); if (ret) goto err_unlock; @@ -1734,7 +1718,8 @@ err_unlock: } static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout, - nfc_digital_cmd_complete_t cb, void *arg, bool mode_detect) + nfc_digital_cmd_complete_t cb, void *arg, + bool mode_detect) { struct trf7970a *trf = nfc_digital_get_drvdata(ddev); int ret; @@ -1742,9 +1727,9 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout, mutex_lock(&trf->lock); if ((trf->state != TRF7970A_ST_IDLE) && - (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) { + (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) { dev_err(trf->dev, "%s - Bogus state: %d\n", __func__, - trf->state); + trf->state); ret = -EIO; goto out_err; } @@ -1757,7 +1742,7 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout, } trf->rx_skb = nfc_alloc_recv_skb(TRF7970A_RX_SKB_ALLOC_SIZE, - GFP_KERNEL); + GFP_KERNEL); if (!trf->rx_skb) { dev_dbg(trf->dev, "Can't alloc rx_skb\n"); ret = -ENOMEM; @@ -1765,25 +1750,25 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout, } ret = trf7970a_write(trf, TRF7970A_RX_SPECIAL_SETTINGS, - TRF7970A_RX_SPECIAL_SETTINGS_HBT | - TRF7970A_RX_SPECIAL_SETTINGS_M848 | - TRF7970A_RX_SPECIAL_SETTINGS_C424 | - TRF7970A_RX_SPECIAL_SETTINGS_C212); + TRF7970A_RX_SPECIAL_SETTINGS_HBT | + TRF7970A_RX_SPECIAL_SETTINGS_M848 | + TRF7970A_RX_SPECIAL_SETTINGS_C424 | + TRF7970A_RX_SPECIAL_SETTINGS_C212); if (ret) goto out_err; ret = trf7970a_write(trf, TRF7970A_REG_IO_CTRL, - trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1)); + trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1)); if (ret) goto out_err; ret = trf7970a_write(trf, TRF7970A_NFC_LOW_FIELD_LEVEL, - TRF7970A_NFC_LOW_FIELD_LEVEL_RFDET(0x3)); + TRF7970A_NFC_LOW_FIELD_LEVEL_RFDET(0x3)); if (ret) goto out_err; ret = trf7970a_write(trf, TRF7970A_NFC_TARGET_LEVEL, - TRF7970A_NFC_TARGET_LEVEL_RFDET(0x7)); + TRF7970A_NFC_TARGET_LEVEL_RFDET(0x7)); if (ret) goto out_err; @@ -1808,32 +1793,33 @@ out_err: } static int trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout, - nfc_digital_cmd_complete_t cb, void *arg) + nfc_digital_cmd_complete_t cb, void *arg) { struct trf7970a *trf = nfc_digital_get_drvdata(ddev); dev_dbg(trf->dev, "Listen - state: %d, timeout: %d ms\n", - trf->state, timeout); + trf->state, timeout); return _trf7970a_tg_listen(ddev, timeout, cb, arg, false); } static int trf7970a_tg_listen_md(struct nfc_digital_dev *ddev, - u16 timeout, nfc_digital_cmd_complete_t cb, void *arg) + u16 timeout, nfc_digital_cmd_complete_t cb, + void *arg) { struct trf7970a *trf = nfc_digital_get_drvdata(ddev); int ret; dev_dbg(trf->dev, "Listen MD - state: %d, timeout: %d ms\n", - trf->state, timeout); + trf->state, timeout); ret = trf7970a_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH, - NFC_DIGITAL_RF_TECH_106A); + NFC_DIGITAL_RF_TECH_106A); if (ret) return ret; ret = trf7970a_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, - NFC_DIGITAL_FRAMING_NFCA_NFC_DEP); + NFC_DIGITAL_FRAMING_NFCA_NFC_DEP); if (ret) return ret; @@ -1845,7 +1831,7 @@ static int trf7970a_tg_get_rf_tech(struct nfc_digital_dev *ddev, u8 *rf_tech) struct trf7970a *trf = nfc_digital_get_drvdata(ddev); dev_dbg(trf->dev, "Get RF Tech - state: %d, rf_tech: %d\n", - trf->state, trf->md_rf_tech); + trf->state, trf->md_rf_tech); *rf_tech = trf->md_rf_tech; @@ -1908,14 +1894,13 @@ static int trf7970a_power_up(struct trf7970a *trf) usleep_range(5000, 6000); - if (!(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) { - if (gpio_is_valid(trf->en2_gpio)) { - gpio_set_value(trf->en2_gpio, 1); - usleep_range(1000, 2000); - } + if (trf->en2_gpiod && + !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) { + gpiod_set_value_cansleep(trf->en2_gpiod, 1); + usleep_range(1000, 2000); } - gpio_set_value(trf->en_gpio, 1); + gpiod_set_value_cansleep(trf->en_gpiod, 1); usleep_range(20000, 21000); @@ -1935,18 +1920,19 @@ static int trf7970a_power_down(struct trf7970a *trf) if (trf->state != TRF7970A_ST_RF_OFF) { dev_dbg(trf->dev, "Can't power down - not RF_OFF state (%d)\n", - trf->state); + trf->state); return -EBUSY; } - gpio_set_value(trf->en_gpio, 0); - if (gpio_is_valid(trf->en2_gpio)) - gpio_set_value(trf->en2_gpio, 0); + gpiod_set_value_cansleep(trf->en_gpiod, 0); + + if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) + gpiod_set_value_cansleep(trf->en2_gpiod, 0); ret = regulator_disable(trf->regulator); if (ret) dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__, - ret); + ret); trf->state = TRF7970A_ST_PWR_OFF; @@ -2003,12 +1989,6 @@ static int trf7970a_get_autosuspend_delay(struct device_node *np) return autosuspend_delay; } -static int trf7970a_get_vin_voltage_override(struct device_node *np, - u32 *vin_uvolts) -{ - return of_property_read_u32(np, "vin-voltage-override", vin_uvolts); -} - static int trf7970a_probe(struct spi_device *spi) { struct device_node *np = spi->dev.of_node; @@ -2038,53 +2018,48 @@ static int trf7970a_probe(struct spi_device *spi) return ret; } - if (of_property_read_bool(np, "t5t-rmb-extra-byte-quirk")) - trf->quirks |= TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE; - if (of_property_read_bool(np, "irq-status-read-quirk")) trf->quirks |= TRF7970A_QUIRK_IRQ_STATUS_READ; - /* There are two enable pins - both must be present */ - trf->en_gpio = of_get_named_gpio(np, "ti,enable-gpios", 0); - if (!gpio_is_valid(trf->en_gpio)) { + /* There are two enable pins - only EN must be present in the DT */ + trf->en_gpiod = devm_gpiod_get_index(trf->dev, "ti,enable", 0, + GPIOD_OUT_LOW); + if (IS_ERR(trf->en_gpiod)) { dev_err(trf->dev, "No EN GPIO property\n"); - return trf->en_gpio; + return PTR_ERR(trf->en_gpiod); } - ret = devm_gpio_request_one(trf->dev, trf->en_gpio, - GPIOF_DIR_OUT | GPIOF_INIT_LOW, "trf7970a EN"); - if (ret) { - dev_err(trf->dev, "Can't request EN GPIO: %d\n", ret); - return ret; - } - - trf->en2_gpio = of_get_named_gpio(np, "ti,enable-gpios", 1); - if (!gpio_is_valid(trf->en2_gpio)) { + trf->en2_gpiod = devm_gpiod_get_index_optional(trf->dev, "ti,enable", 1, + GPIOD_OUT_LOW); + if (!trf->en2_gpiod) { dev_info(trf->dev, "No EN2 GPIO property\n"); - } else { - ret = devm_gpio_request_one(trf->dev, trf->en2_gpio, - GPIOF_DIR_OUT | GPIOF_INIT_LOW, "trf7970a EN2"); - if (ret) { - dev_err(trf->dev, "Can't request EN2 GPIO: %d\n", ret); - return ret; - } + } else if (IS_ERR(trf->en2_gpiod)) { + dev_err(trf->dev, "Error getting EN2 GPIO property: %ld\n", + PTR_ERR(trf->en2_gpiod)); + return PTR_ERR(trf->en2_gpiod); + } else if (of_property_read_bool(np, "en2-rf-quirk")) { + trf->quirks |= TRF7970A_QUIRK_EN2_MUST_STAY_LOW; } of_property_read_u32(np, "clock-frequency", &clk_freq); - if ((clk_freq != TRF7970A_27MHZ_CLOCK_FREQUENCY) || - (clk_freq != TRF7970A_13MHZ_CLOCK_FREQUENCY)) { + if ((clk_freq != TRF7970A_27MHZ_CLOCK_FREQUENCY) && + (clk_freq != TRF7970A_13MHZ_CLOCK_FREQUENCY)) { dev_err(trf->dev, - "clock-frequency (%u Hz) unsupported\n", - clk_freq); + "clock-frequency (%u Hz) unsupported\n", clk_freq); return -EINVAL; } - if (of_property_read_bool(np, "en2-rf-quirk")) - trf->quirks |= TRF7970A_QUIRK_EN2_MUST_STAY_LOW; + if (clk_freq == TRF7970A_27MHZ_CLOCK_FREQUENCY) { + trf->modulator_sys_clk_ctrl = TRF7970A_MODULATOR_27MHZ; + dev_dbg(trf->dev, "trf7970a configured for 27MHz crystal\n"); + } else { + trf->modulator_sys_clk_ctrl = 0; + } ret = devm_request_threaded_irq(trf->dev, spi->irq, NULL, - trf7970a_irq, IRQF_TRIGGER_RISING | IRQF_ONESHOT, - "trf7970a", trf); + trf7970a_irq, + IRQF_TRIGGER_RISING | IRQF_ONESHOT, + "trf7970a", trf); if (ret) { dev_err(trf->dev, "Can't request IRQ#%d: %d\n", spi->irq, ret); return ret; @@ -2106,10 +2081,7 @@ static int trf7970a_probe(struct spi_device *spi) goto err_destroy_lock; } - ret = trf7970a_get_vin_voltage_override(np, &uvolts); - if (ret) - uvolts = regulator_get_voltage(trf->regulator); - + uvolts = regulator_get_voltage(trf->regulator); if (uvolts > 4000000) trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3; @@ -2132,9 +2104,10 @@ static int trf7970a_probe(struct spi_device *spi) } trf->ddev = nfc_digital_allocate_device(&trf7970a_nfc_ops, - TRF7970A_SUPPORTED_PROTOCOLS, - NFC_DIGITAL_DRV_CAPS_IN_CRC | - NFC_DIGITAL_DRV_CAPS_TG_CRC, 0, 0); + TRF7970A_SUPPORTED_PROTOCOLS, + NFC_DIGITAL_DRV_CAPS_IN_CRC | + NFC_DIGITAL_DRV_CAPS_TG_CRC, 0, + 0); if (!trf->ddev) { dev_err(trf->dev, "Can't allocate NFC digital device\n"); ret = -ENOMEM; @@ -2157,7 +2130,7 @@ static int trf7970a_probe(struct spi_device *spi) ret = nfc_digital_register_device(trf->ddev); if (ret) { dev_err(trf->dev, "Can't register NFC digital device: %d\n", - ret); + ret); goto err_shutdown; } @@ -2266,29 +2239,31 @@ static int trf7970a_pm_runtime_resume(struct device *dev) static const struct dev_pm_ops trf7970a_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(trf7970a_suspend, trf7970a_resume) SET_RUNTIME_PM_OPS(trf7970a_pm_runtime_suspend, - trf7970a_pm_runtime_resume, NULL) + trf7970a_pm_runtime_resume, NULL) }; static const struct of_device_id trf7970a_of_match[] = { - { .compatible = "ti,trf7970a", }, - { /* sentinel */ }, + {.compatible = "ti,trf7970a",}, + {}, }; + MODULE_DEVICE_TABLE(of, trf7970a_of_match); static const struct spi_device_id trf7970a_id_table[] = { - { "trf7970a", 0 }, - { } + {"trf7970a", 0}, + {} }; + MODULE_DEVICE_TABLE(spi, trf7970a_id_table); static struct spi_driver trf7970a_spi_driver = { .probe = trf7970a_probe, .remove = trf7970a_remove, .id_table = trf7970a_id_table, - .driver = { - .name = "trf7970a", - .of_match_table = of_match_ptr(trf7970a_of_match), - .pm = &trf7970a_pm_ops, + .driver = { + .name = "trf7970a", + .of_match_table = of_match_ptr(trf7970a_of_match), + .pm = &trf7970a_pm_ops, }, }; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 951042a375d6..40c7581caeb0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1805,7 +1805,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (pci_is_enabled(pdev)) { u32 csts = readl(dev->bar + NVME_REG_CSTS); - if (dev->ctrl.state == NVME_CTRL_LIVE) + if (dev->ctrl.state == NVME_CTRL_LIVE || + dev->ctrl.state == NVME_CTRL_RESETTING) nvme_start_freeze(&dev->ctrl); dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || pdev->error_state != pci_channel_io_normal); diff --git a/drivers/s390/net/ctcm_fsms.c b/drivers/s390/net/ctcm_fsms.c index e9847ce3860d..570ae3b7adf6 100644 --- a/drivers/s390/net/ctcm_fsms.c +++ b/drivers/s390/net/ctcm_fsms.c @@ -217,7 +217,7 @@ void ctcm_purge_skb_queue(struct sk_buff_head *q) CTCM_DBF_TEXT(TRACE, CTC_DBF_DEBUG, __func__); while ((skb = skb_dequeue(q))) { - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_any(skb); } } @@ -271,7 +271,7 @@ static void chx_txdone(fsm_instance *fi, int event, void *arg) priv->stats.tx_bytes += 2; first = 0; } - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_irq(skb); } spin_lock(&ch->collect_lock); @@ -297,7 +297,7 @@ static void chx_txdone(fsm_instance *fi, int event, void *arg) skb_put(ch->trans_skb, skb->len), skb->len); priv->stats.tx_packets++; priv->stats.tx_bytes += skb->len - LL_HEADER_LENGTH; - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_irq(skb); i++; } @@ -1248,7 +1248,7 @@ static void ctcmpc_chx_txdone(fsm_instance *fi, int event, void *arg) priv->stats.tx_bytes += 2; first = 0; } - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_irq(skb); } spin_lock(&ch->collect_lock); @@ -1298,7 +1298,7 @@ static void ctcmpc_chx_txdone(fsm_instance *fi, int event, void *arg) data_space -= skb->len; priv->stats.tx_packets++; priv->stats.tx_bytes += skb->len; - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_any(skb); peekskb = skb_peek(&ch->collect_queue); if (peekskb->len > data_space) @@ -1795,7 +1795,7 @@ static void ctcmpc_chx_send_sweep(fsm_instance *fsm, int event, void *arg) fsm_event(grp->fsm, MPCG_EVENT_INOP, dev); goto done; } else { - atomic_inc(&skb->users); + refcount_inc(&skb->users); skb_queue_tail(&wch->io_queue, skb); } diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 99121352c57b..e8782a8619f7 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -483,7 +483,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb) spin_unlock_irqrestore(&ch->collect_lock, saveflags); return -EBUSY; } else { - atomic_inc(&skb->users); + refcount_inc(&skb->users); header.length = l; header.type = be16_to_cpu(skb->protocol); header.unused = 0; @@ -500,7 +500,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb) * Protect skb against beeing free'd by upper * layers. */ - atomic_inc(&skb->users); + refcount_inc(&skb->users); ch->prof.txlen += skb->len; header.length = skb->len + LL_HEADER_LENGTH; header.type = be16_to_cpu(skb->protocol); @@ -517,14 +517,14 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb) if (hi) { nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA); if (!nskb) { - atomic_dec(&skb->users); + refcount_dec(&skb->users); skb_pull(skb, LL_HEADER_LENGTH + 2); ctcm_clear_busy(ch->netdev); return -ENOMEM; } else { skb_put_data(nskb, skb->data, skb->len); - atomic_inc(&nskb->users); - atomic_dec(&skb->users); + refcount_inc(&nskb->users); + refcount_dec(&skb->users); dev_kfree_skb_irq(skb); skb = nskb; } @@ -542,7 +542,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb) * Remove our header. It gets added * again on retransmit. */ - atomic_dec(&skb->users); + refcount_dec(&skb->users); skb_pull(skb, LL_HEADER_LENGTH + 2); ctcm_clear_busy(ch->netdev); return -ENOMEM; @@ -553,7 +553,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb) ch->ccw[1].count = skb->len; skb_copy_from_linear_data(skb, skb_put(ch->trans_skb, skb->len), skb->len); - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_irq(skb); ccw_idx = 0; } else { @@ -679,7 +679,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb) if ((fsm_getstate(ch->fsm) != CTC_STATE_TXIDLE) || grp->in_sweep) { spin_lock_irqsave(&ch->collect_lock, saveflags); - atomic_inc(&skb->users); + refcount_inc(&skb->users); p_header = kmalloc(PDU_HEADER_LENGTH, gfp_type()); if (!p_header) { @@ -716,7 +716,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb) * Protect skb against beeing free'd by upper * layers. */ - atomic_inc(&skb->users); + refcount_inc(&skb->users); /* * IDAL support in CTCM is broken, so we have to @@ -729,8 +729,8 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb) goto nomem_exit; } else { skb_put_data(nskb, skb->data, skb->len); - atomic_inc(&nskb->users); - atomic_dec(&skb->users); + refcount_inc(&nskb->users); + refcount_dec(&skb->users); dev_kfree_skb_irq(skb); skb = nskb; } @@ -810,7 +810,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb) ch->trans_skb->len = 0; ch->ccw[1].count = skb->len; skb_put_data(ch->trans_skb, skb->data, skb->len); - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_irq(skb); ccw_idx = 0; CTCM_PR_DBGDATA("%s(%s): trans_skb len: %04x\n" @@ -855,7 +855,7 @@ nomem_exit: "%s(%s): MEMORY allocation ERROR\n", CTCM_FUNTAIL, ch->id); rc = -ENOMEM; - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_any(skb); fsm_event(priv->mpcg->fsm, MPCG_EVENT_INOP, dev); done: diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 7db427c0a6a4..1579695f4e64 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -743,7 +743,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg) conn->prof.tx_pending--; if (single_flag) { if ((skb = skb_dequeue(&conn->commit_queue))) { - atomic_dec(&skb->users); + refcount_dec(&skb->users); if (privptr) { privptr->stats.tx_packets++; privptr->stats.tx_bytes += @@ -766,7 +766,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg) txbytes += skb->len; txpackets++; stat_maxcq++; - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_any(skb); } if (conn->collect_len > conn->prof.maxmulti) @@ -958,7 +958,7 @@ static void netiucv_purge_skb_queue(struct sk_buff_head *q) struct sk_buff *skb; while ((skb = skb_dequeue(q))) { - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_any(skb); } } @@ -1176,7 +1176,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn, IUCV_DBF_TEXT(data, 2, "EBUSY from netiucv_transmit_skb\n"); } else { - atomic_inc(&skb->users); + refcount_inc(&skb->users); skb_queue_tail(&conn->collect_queue, skb); conn->collect_len += l; rc = 0; @@ -1245,7 +1245,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn, } else { if (copied) dev_kfree_skb(skb); - atomic_inc(&nskb->users); + refcount_inc(&nskb->users); skb_queue_tail(&conn->commit_queue, nskb); } } diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 3b657d5b7e49..aec06e10b969 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1242,7 +1242,7 @@ static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf) iucv->sk_txnotify(skb, TX_NOTIFY_GENERALERROR); } } - atomic_dec(&skb->users); + refcount_dec(&skb->users); dev_kfree_skb_any(skb); skb = skb_dequeue(&buf->skb_list); } @@ -3975,7 +3975,7 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue, int flush_cnt = 0, hdr_len, large_send = 0; buffer = buf->buffer; - atomic_inc(&skb->users); + refcount_inc(&skb->users); skb_queue_tail(&buf->skb_list, skb); /*check first on TSO ....*/ diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index 2ee92aa90fe9..e937490d5d97 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -870,7 +870,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, QEDI_ERR(&qedi->dbg_ctx, "Delayed or untracked cleanup response, itt=0x%x, tid=0x%x, cid=0x%x, task=%p\n", protoitt, cqe->itid, qedi_conn->iscsi_conn_id, task); - WARN_ON(1); } } diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index f46880315ba8..5f5a4ef2e529 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1499,11 +1499,9 @@ err_idx: void qedi_clear_task_idx(struct qedi_ctx *qedi, int idx) { - if (!test_and_clear_bit(idx, qedi->task_idx_map)) { + if (!test_and_clear_bit(idx, qedi->task_idx_map)) QEDI_ERR(&qedi->dbg_ctx, "FW task context, already cleared, tid=0x%x\n", idx); - WARN_ON(1); - } } void qedi_update_itt_map(struct qedi_ctx *qedi, u32 tid, u32 proto_itt, diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 0d8f81591bed..3fdca2cdd8da 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1279,6 +1279,18 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, */ if (dump_payload) goto after_immediate_data; + /* + * Check for underflow case where both EDTL and immediate data payload + * exceeds what is presented by CDB's TRANSFER LENGTH, and what has + * already been set in target_cmd_size_check() as se_cmd->data_length. + * + * For this special case, fail the command and dump the immediate data + * payload. + */ + if (cmd->first_burst_len > cmd->se_cmd.data_length) { + cmd->sense_reason = TCM_INVALID_CDB_FIELD; + goto after_immediate_data; + } immed_ret = iscsit_handle_immediate_data(cmd, hdr, cmd->first_burst_len); @@ -4423,8 +4435,11 @@ static void iscsit_logout_post_handler_closesession( * always sleep waiting for RX/TX thread shutdown to complete * within iscsit_close_connection(). */ - if (!conn->conn_transport->rdma_shutdown) + if (!conn->conn_transport->rdma_shutdown) { sleep = cmpxchg(&conn->tx_thread_active, true, false); + if (!sleep) + return; + } atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); @@ -4440,8 +4455,11 @@ static void iscsit_logout_post_handler_samecid( { int sleep = 1; - if (!conn->conn_transport->rdma_shutdown) + if (!conn->conn_transport->rdma_shutdown) { sleep = cmpxchg(&conn->tx_thread_active, true, false); + if (!sleep) + return; + } atomic_set(&conn->conn_logout_remove, 0); complete(&conn->conn_logout_comp); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 9ab7090f7c83..0912de7c0cf8 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -136,7 +136,7 @@ int init_se_kmem_caches(void); void release_se_kmem_caches(void); u32 scsi_get_new_index(scsi_index_t); void transport_subsystem_check_init(void); -void transport_cmd_finish_abort(struct se_cmd *, int); +int transport_cmd_finish_abort(struct se_cmd *, int); unsigned char *transport_dump_cmd_direction(struct se_cmd *); void transport_dump_dev_state(struct se_device *, char *, int *); void transport_dump_dev_info(struct se_device *, struct se_lun *, diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index dce1e1b47316..13f47bf4d16b 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -75,7 +75,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr) kfree(tmr); } -static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) +static int core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) { unsigned long flags; bool remove = true, send_tas; @@ -91,7 +91,7 @@ static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) transport_send_task_abort(cmd); } - transport_cmd_finish_abort(cmd, remove); + return transport_cmd_finish_abort(cmd, remove); } static int target_check_cdb_and_preempt(struct list_head *list, @@ -184,8 +184,8 @@ void core_tmr_abort_task( cancel_work_sync(&se_cmd->work); transport_wait_for_tasks(se_cmd); - transport_cmd_finish_abort(se_cmd, true); - target_put_sess_cmd(se_cmd); + if (!transport_cmd_finish_abort(se_cmd, true)) + target_put_sess_cmd(se_cmd); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" " ref_tag: %llu\n", ref_tag); @@ -281,8 +281,8 @@ static void core_tmr_drain_tmr_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - transport_cmd_finish_abort(cmd, 1); - target_put_sess_cmd(cmd); + if (!transport_cmd_finish_abort(cmd, 1)) + target_put_sess_cmd(cmd); } } @@ -380,8 +380,8 @@ static void core_tmr_drain_state_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - core_tmr_handle_tas_abort(cmd, tas); - target_put_sess_cmd(cmd); + if (!core_tmr_handle_tas_abort(cmd, tas)) + target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 6025935036c9..f1b3a46bdcaf 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -651,9 +651,10 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd) percpu_ref_put(&lun->lun_ref); } -void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) +int transport_cmd_finish_abort(struct se_cmd *cmd, int remove) { bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF); + int ret = 0; if (cmd->se_cmd_flags & SCF_SE_LUN_CMD) transport_lun_remove_cmd(cmd); @@ -665,9 +666,11 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) cmd->se_tfo->aborted_task(cmd); if (transport_cmd_check_stop_to_fabric(cmd)) - return; + return 1; if (remove && ack_kref) - transport_put_cmd(cmd); + ret = transport_put_cmd(cmd); + + return ret; } static void target_complete_failure_work(struct work_struct *work) diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 734cbf8d9676..dd9f1bebb5a3 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp, int status; token = (autofs_wqt_t) param->fail.token; - status = param->fail.status ? param->fail.status : -ENOENT; + status = param->fail.status < 0 ? param->fail.status : -ENOENT; return autofs4_wait_release(sbi, token, status); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 519599dddd36..0a7404ef9335 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -263,7 +263,10 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, kfree(vecs); if (unlikely(bio.bi_error)) - return bio.bi_error; + ret = bio.bi_error; + + bio_uninit(&bio); + return ret; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0fd081bd2a2f..fcef70602b27 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3271,7 +3271,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) if (!is_sync_kiocb(iocb)) ctx->iocb = iocb; - if (to->type & ITER_IOVEC) + if (to->type == ITER_IOVEC) ctx->should_dirty = true; rc = setup_aio_ctx_iter(ctx, to, READ); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index b08531977daa..3b147dc6af63 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) if (!pages) { pages = vmalloc(max_pages * sizeof(struct page *)); - if (!bv) { + if (!pages) { kvfree(bv); return -ENOMEM; } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 27bc360c7ffd..a723df3e0197 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid, __u16 search_flags, struct cifs_search_info *srch_inf) { - return CIFSFindFirst(xid, tcon, path, cifs_sb, - &fid->netfid, search_flags, srch_inf, true); + int rc; + + rc = CIFSFindFirst(xid, tcon, path, cifs_sb, + &fid->netfid, search_flags, srch_inf, true); + if (rc) + cifs_dbg(FYI, "find first failed=%d\n", rc); + return rc; } static int diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c58691834eb2..7e48561abd29 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); kfree(utf16_path); if (rc) { - cifs_dbg(VFS, "open dir failed\n"); + cifs_dbg(FYI, "open dir failed rc=%d\n", rc); return rc; } @@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, fid->volatile_fid, 0, srch_inf); if (rc) { - cifs_dbg(VFS, "query directory failed\n"); + cifs_dbg(FYI, "query directory failed rc=%d\n", rc); SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } return rc; @@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc) sg = init_sg(rqst, sign); if (!sg) { - cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc); + cifs_dbg(VFS, "%s: Failed to init sg", __func__); + rc = -ENOMEM; goto free_req; } @@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc) iv = kzalloc(iv_len, GFP_KERNEL); if (!iv) { cifs_dbg(VFS, "%s: Failed to alloc IV", __func__); + rc = -ENOMEM; goto free_sg; } iv[0] = 3; diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 3cb5c9e2d4e7..de50e749ff05 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode, pcreatetime = (__u64 *)value; *pcreatetime = CIFS_I(inode)->createtime; return sizeof(__u64); - - return rc; } @@ -859,6 +859,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, if (ret < 0) goto out; } + start_index = indices[pvec.nr - 1] + 1; } out: put_dax(dax_dev); diff --git a/fs/exec.c b/fs/exec.c index 72934df68471..904199086490 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -220,8 +220,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, if (write) { unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; + unsigned long ptr_size; struct rlimit *rlim; + /* + * Since the stack will hold pointers to the strings, we + * must account for them as well. + * + * The size calculation is the entire vma while each arg page is + * built, so each time we get here it's calculating how far it + * is currently (rather than each call being just the newly + * added size from the arg page). As a result, we need to + * always add the entire size of the pointers, so that on the + * last call to get_arg_page() we'll actually have the entire + * correct size. + */ + ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + if (ptr_size > ULONG_MAX - size) + goto fail; + size += ptr_size; + acct_arg_size(bprm, size / PAGE_SIZE); /* @@ -239,13 +257,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, * to work from. */ rlim = current->signal->rlim; - if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) { - put_page(page); - return NULL; - } + if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) + goto fail; } return page; + +fail: + put_page(page); + return NULL; } static void put_arg_page(struct page *page) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c14758e08d73..390ac9c39c59 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -753,7 +753,6 @@ static void nfs4_callback_free_slot(struct nfs4_session *session, * A single slot, so highest used slotid is either 0 or -1 */ nfs4_free_slot(tbl, slot); - nfs4_slot_tbl_drain_complete(tbl); spin_unlock(&tbl->slot_tbl_lock); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 32ccd7754f8a..2ac00bf4ecf1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1946,29 +1946,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) } EXPORT_SYMBOL_GPL(nfs_link); -static void -nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data) -{ - struct dentry *old_dentry = data->old_dentry; - struct dentry *new_dentry = data->new_dentry; - struct inode *old_inode = d_inode(old_dentry); - struct inode *new_inode = d_inode(new_dentry); - - nfs_mark_for_revalidate(old_inode); - - switch (task->tk_status) { - case 0: - if (new_inode != NULL) - nfs_drop_nlink(new_inode); - d_move(old_dentry, new_dentry); - nfs_set_verifier(new_dentry, - nfs_save_change_attribute(data->new_dir)); - break; - case -ENOENT: - nfs_dentry_handle_enoent(old_dentry); - } -} - /* * RENAME * FIXME: Some nfsds, like the Linux user space nfsd, may generate a @@ -1999,7 +1976,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); - struct dentry *dentry = NULL; + struct dentry *dentry = NULL, *rehash = NULL; struct rpc_task *task; int error = -EBUSY; @@ -2022,8 +1999,10 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, * To prevent any new references to the target during the * rename, we unhash the dentry in advance. */ - if (!d_unhashed(new_dentry)) + if (!d_unhashed(new_dentry)) { d_drop(new_dentry); + rehash = new_dentry; + } if (d_count(new_dentry) > 2) { int err; @@ -2040,6 +2019,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; new_dentry = dentry; + rehash = NULL; new_inode = NULL; } } @@ -2048,8 +2028,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode != NULL) NFS_PROTO(new_inode)->return_delegation(new_inode); - task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, - nfs_complete_rename); + task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); if (IS_ERR(task)) { error = PTR_ERR(task); goto out; @@ -2059,9 +2038,27 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error == 0) error = task->tk_status; rpc_put_task(task); + nfs_mark_for_revalidate(old_inode); out: + if (rehash) + d_rehash(rehash); trace_nfs_rename_exit(old_dir, old_dentry, new_dir, new_dentry, error); + if (!error) { + if (new_inode != NULL) + nfs_drop_nlink(new_inode); + /* + * The d_move() should be here instead of in an async RPC completion + * handler because we need the proper locks to move the dentry. If + * we're interrupted by a signal, the async RPC completion handler + * should mark the directories for revalidation. + */ + d_move(old_dentry, new_dentry); + nfs_set_verifier(new_dentry, + nfs_save_change_attribute(new_dir)); + } else if (error == -ENOENT) + nfs_dentry_handle_enoent(old_dentry); + /* new dentry created? */ if (dentry) dput(dentry); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c08c46a3b8cd..dbfa18900e25 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2589,7 +2589,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, /* Except MODE, it seems harmless of setting twice. */ if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && - attrset[1] & FATTR4_WORD1_MODE) + (attrset[1] & FATTR4_WORD1_MODE || + attrset[2] & FATTR4_WORD2_MODE_UMASK)) sattr->ia_valid &= ~ATTR_MODE; if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) @@ -8416,6 +8417,7 @@ static void nfs4_layoutget_release(void *calldata) size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); + nfs4_sequence_free_slot(&lgp->res.seq_res); nfs4_free_pages(lgp->args.layout.pages, max_pages); pnfs_put_layout_hdr(NFS_I(inode)->layout); put_nfs_open_context(lgp->args.ctx); @@ -8490,7 +8492,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ if (status == 0 && lgp->res.layoutp->len) lseg = pnfs_layout_process(lgp); - nfs4_sequence_free_slot(&lgp->res.seq_res); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); if (status) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b34de036501b..cbf82b0d4467 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2134,6 +2134,8 @@ again: put_rpccred(cred); switch (status) { case 0: + case -EINTR: + case -ERESTARTSYS: break; case -ETIMEDOUT: if (clnt->cl_softrtry) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 3b7c937a36b5..4689940a953c 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode, struct ocfs2_lock_res *lockres; lockres = &OCFS2_I(inode)->ip_inode_lockres; + /* had_lock means that the currect process already takes the cluster + * lock previously. If had_lock is 1, we have nothing to do here, and + * it will get unlocked where we got the lock. + */ if (!had_lock) { ocfs2_remove_holder(lockres, oh); ocfs2_inode_unlock(inode, ex); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3c5384d9b3a5..f70c3778d600 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode, void *buffer, size_t buffer_size) { - int ret; + int ret, had_lock; struct buffer_head *di_bh = NULL; + struct ocfs2_lock_holder oh; - ret = ocfs2_inode_lock(inode, &di_bh, 0); - if (ret < 0) { - mlog_errno(ret); - return ret; + had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh); + if (had_lock < 0) { + mlog_errno(had_lock); + return had_lock; } down_read(&OCFS2_I(inode)->ip_xattr_sem); ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, name, buffer, buffer_size); up_read(&OCFS2_I(inode)->ip_xattr_sem); - ocfs2_inode_unlock(inode, 0); + ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); brelse(di_bh); @@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode, { struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; - int ret, credits, ref_meta = 0, ref_credits = 0; + int ret, credits, had_lock, ref_meta = 0, ref_credits = 0; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; struct ocfs2_refcount_tree *ref_tree = NULL; + struct ocfs2_lock_holder oh; struct ocfs2_xattr_info xi = { .xi_name_index = name_index, @@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode, return -ENOMEM; } - ret = ocfs2_inode_lock(inode, &di_bh, 1); - if (ret < 0) { + had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh); + if (had_lock < 0) { + ret = had_lock; mlog_errno(ret); goto cleanup_nolock; } @@ -3670,7 +3673,7 @@ cleanup: if (ret) mlog_errno(ret); } - ocfs2_inode_unlock(inode, 1); + ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); cleanup_nolock: brelse(di_bh); brelse(xbs.xattr_bh); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 09af0f7cd55e..3b91faacc1ba 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1316,9 +1316,12 @@ xfs_vm_bmap( * The swap code (ab-)uses ->bmap to get a block mapping and then * bypasseÑ• the file system for actual I/O. We really can't allow * that on reflinks inodes, so we have to skip out here. And yes, - * 0 is the magic code for a bmap error.. + * 0 is the magic code for a bmap error. + * + * Since we don't pass back blockdev info, we can't return bmap + * information for rt files either. */ - if (xfs_is_reflink_inode(ip)) + if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip)) return 0; filemap_write_and_wait(mapping); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 197f3fffc9a7..408c7820e200 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -210,7 +210,8 @@ struct acpi_device_flags { u32 of_compatible_ok:1; u32 coherent_dma:1; u32 cca_seen:1; - u32 reserved:20; + u32 spi_i2c_slave:1; + u32 reserved:19; }; /* File System */ diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index c1da539f5e28..4d97a89da066 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -254,7 +254,7 @@ static inline void atm_return(struct atm_vcc *vcc,int truesize) static inline int atm_may_send(struct atm_vcc *vcc,unsigned int size) { - return (size + atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) < + return (size + refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) < sk_atm(vcc)->sk_sndbuf; } diff --git a/include/linux/bio.h b/include/linux/bio.h index d1b04b0e99cf..a7e29fa0981f 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -426,6 +426,7 @@ extern void bio_advance(struct bio *, unsigned); extern void bio_init(struct bio *bio, struct bio_vec *table, unsigned short max_vecs); +extern void bio_uninit(struct bio *); extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b74a3edcb3da..1ddd36bd2173 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -391,6 +391,8 @@ struct request_queue { int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ + atomic_t shared_hctx_restart; + struct blk_queue_stats *stats; struct rq_wb *rq_wb; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index c970a25d2a49..360c082e885c 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -7,6 +7,7 @@ struct sock; struct cgroup; struct sk_buff; +struct bpf_sock_ops_kern; #ifdef CONFIG_CGROUP_BPF @@ -42,6 +43,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, int __cgroup_bpf_run_filter_sk(struct sock *sk, enum bpf_attach_type type); +int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, + struct bpf_sock_ops_kern *sock_ops, + enum bpf_attach_type type); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -75,6 +80,18 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, __ret; \ }) +#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && (sock_ops)->sk) { \ + typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \ + if (sk_fullsock(__sk)) \ + __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \ + sock_ops, \ + BPF_CGROUP_SOCK_OPS); \ + } \ + __ret; \ +}) #else struct cgroup_bpf {}; @@ -85,6 +102,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp, #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #endif /* CONFIG_CGROUP_BPF */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index deca4e7f2845..b69e7a5869ff 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,6 +36,7 @@ struct bpf_map_ops { int fd); void (*map_fd_put_ptr)(void *ptr); u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); + u32 (*map_fd_sys_lookup_elem)(void *ptr); }; struct bpf_map { @@ -155,9 +156,14 @@ struct bpf_prog; struct bpf_insn_access_aux { enum bpf_reg_type reg_type; int ctx_field_size; - int converted_op_size; }; +static inline void +bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) +{ + aux->ctx_field_size = size; +} + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -172,7 +178,7 @@ struct bpf_verifier_ops { u32 (*convert_ctx_access)(enum bpf_access_type type, const struct bpf_insn *src, struct bpf_insn *dst, - struct bpf_prog *prog); + struct bpf_prog *prog, u32 *target_size); int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); }; @@ -288,9 +294,11 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); +int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); void bpf_fd_array_map_clear(struct bpf_map *map); int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); +int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 03bf223f18be..3d137c33d664 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -10,6 +10,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops) #endif #ifdef CONFIG_BPF_EVENTS BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) diff --git a/include/linux/filter.h b/include/linux/filter.h index 1fa26dc562ce..f1fc9baa3509 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -337,6 +337,22 @@ struct bpf_prog_aux; bpf_size; \ }) +#define bpf_size_to_bytes(bpf_size) \ +({ \ + int bytes = -EINVAL; \ + \ + if (bpf_size == BPF_B) \ + bytes = sizeof(u8); \ + else if (bpf_size == BPF_H) \ + bytes = sizeof(u16); \ + else if (bpf_size == BPF_W) \ + bytes = sizeof(u32); \ + else if (bpf_size == BPF_DW) \ + bytes = sizeof(u64); \ + \ + bytes; \ +}) + #define BPF_SIZEOF(type) \ ({ \ const int __size = bytes_to_bpf_size(sizeof(type)); \ @@ -351,6 +367,13 @@ struct bpf_prog_aux; __size; \ }) +#define BPF_LDST_BYTES(insn) \ + ({ \ + const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \ + WARN_ON(__size < 0); \ + __size; \ + }) + #define __BPF_MAP_0(m, v, ...) v #define __BPF_MAP_1(m, v, t, a, ...) m(t, a) #define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__) @@ -401,6 +424,18 @@ struct bpf_prog_aux; #define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) #define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define bpf_ctx_range(TYPE, MEMBER) \ + offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 +#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \ + offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1 + +#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \ + ({ \ + BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE)); \ + *(PTR_SIZE) = (SIZE); \ + offsetof(TYPE, MEMBER); \ + }) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { @@ -564,6 +599,18 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) return prog->type == BPF_PROG_TYPE_UNSPEC; } +static inline bool +bpf_ctx_narrow_access_ok(u32 off, u32 size, const u32 size_default) +{ + bool off_ok; +#ifdef __LITTLE_ENDIAN + off_ok = (off & (size_default - 1)) == 0; +#else + off_ok = (off & (size_default - 1)) + size == size_default; +#endif + return off_ok && size <= size_default && (size & (size - 1)) == 0; +} + #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) #ifdef CONFIG_ARCH_HAS_SET_MEMORY @@ -898,4 +945,13 @@ static inline int bpf_tell_extensions(void) return SKF_AD_MAX; } +struct bpf_sock_ops_kern { + struct sock *sk; + u32 op; + union { + u32 reply; + u32 replylong[4]; + }; +}; + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 12f6fba6d21a..97caf1821de8 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -18,6 +18,7 @@ #include <linux/skbuff.h> #include <linux/timer.h> #include <linux/in.h> +#include <linux/refcount.h> #include <uapi/linux/igmp.h> static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb) @@ -84,7 +85,7 @@ struct ip_mc_list { struct ip_mc_list __rcu *next_hash; struct timer_list timer; int users; - atomic_t refcnt; + refcount_t refcnt; spinlock_t lock; char tm_running; char reporter; diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index e7c04c4e4bcd..fb3f809e34e4 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -11,6 +11,7 @@ #include <linux/timer.h> #include <linux/sysctl.h> #include <linux/rtnetlink.h> +#include <linux/refcount.h> struct ipv4_devconf { void *sysctl; @@ -22,7 +23,7 @@ struct ipv4_devconf { struct in_device { struct net_device *dev; - atomic_t refcnt; + refcount_t refcnt; int dead; struct in_ifaddr *ifa_list; /* IP ifaddr chain */ @@ -219,7 +220,7 @@ static inline struct in_device *in_dev_get(const struct net_device *dev) rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) - atomic_inc(&in_dev->refcnt); + refcount_inc(&in_dev->refcnt); rcu_read_unlock(); return in_dev; } @@ -240,12 +241,12 @@ void in_dev_finish_destroy(struct in_device *idev); static inline void in_dev_put(struct in_device *idev) { - if (atomic_dec_and_test(&idev->refcnt)) + if (refcount_dec_and_test(&idev->refcnt)) in_dev_finish_destroy(idev); } -#define __in_dev_put(idev) atomic_dec(&(idev)->refcnt) -#define in_dev_hold(idev) atomic_inc(&(idev)->refcnt) +#define __in_dev_put(idev) refcount_dec(&(idev)->refcnt) +#define in_dev_hold(idev) refcount_inc(&(idev)->refcnt) #endif /* __KERNEL__ */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 556e1c31b5d0..f31a0b5377e1 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1103,6 +1103,9 @@ enum mlx5_mcam_feature_groups { #define MLX5_CAP_FPGA(mdev, cap) \ MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) +#define MLX5_CAP64_FPGA(mdev, cap) \ + MLX5_GET64(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 750701b3b863..df6ce59a1f95 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -44,6 +44,7 @@ #include <linux/workqueue.h> #include <linux/mempool.h> #include <linux/interrupt.h> +#include <linux/idr.h> #include <linux/mlx5/device.h> #include <linux/mlx5/doorbell.h> @@ -110,6 +111,7 @@ enum { MLX5_REG_DCBX_APP = 0x4021, MLX5_REG_FPGA_CAP = 0x4022, MLX5_REG_FPGA_CTRL = 0x4023, + MLX5_REG_FPGA_ACCESS_REG = 0x4024, MLX5_REG_PCAP = 0x5001, MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, @@ -737,6 +739,14 @@ struct mlx5e_resources { struct mlx5_sq_bfreg bfreg; }; +#define MLX5_MAX_RESERVED_GIDS 8 + +struct mlx5_rsvd_gids { + unsigned int start; + unsigned int count; + struct ida ida; +}; + struct mlx5_core_dev { struct pci_dev *pdev; /* sync pci state */ @@ -766,6 +776,10 @@ struct mlx5_core_dev { atomic_t num_qps; u32 issi; struct mlx5e_resources mlx5e_res; + struct { + struct mlx5_rsvd_gids reserved_gids; + atomic_t roce_en; + } roce; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif @@ -932,6 +946,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); +void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); @@ -1045,6 +1060,11 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, bool map_wc, bool fast_path); void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); +unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); +int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, + u8 roce_version, u8 roce_l3_type, const u8 *gid, + const u8 *mac, bool vlan, u16 vlan_id); + static inline int fw_initializing(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->initializing) >> 31; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d6b99d5d0f24..87869c04849a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -232,6 +232,11 @@ enum { MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, + MLX5_CMD_OP_FPGA_CREATE_QP = 0x960, + MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961, + MLX5_CMD_OP_FPGA_QUERY_QP = 0x962, + MLX5_CMD_OP_FPGA_DESTROY_QP = 0x963, + MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS = 0x964, MLX5_CMD_OP_MAX }; @@ -600,7 +605,10 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 tunnel_statless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; - u8 reserved_at_20[0x20]; + u8 swp[0x1]; + u8 swp_csum[0x1]; + u8 swp_lso[0x1]; + u8 reserved_at_23[0x1d]; u8 reserved_at_40[0x10]; u8 lro_min_mss_size[0x10]; @@ -2433,7 +2441,8 @@ struct mlx5_ifc_sqc_bits { u8 min_wqe_inline_mode[0x3]; u8 state[0x4]; u8 reg_umr[0x1]; - u8 reserved_at_d[0x13]; + u8 allow_swp[0x1]; + u8 reserved_at_e[0x12]; u8 reserved_at_20[0x8]; u8 user_index[0x18]; @@ -8304,6 +8313,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_sltp_reg_bits sltp_reg; struct mlx5_ifc_mtpps_reg_bits mtpps_reg; struct mlx5_ifc_mtppse_reg_bits mtppse_reg; + struct mlx5_ifc_fpga_access_reg_bits fpga_access_reg; struct mlx5_ifc_fpga_ctrl_bits fpga_ctrl_bits; struct mlx5_ifc_fpga_cap_bits fpga_cap_bits; struct mlx5_ifc_mcqi_reg_bits mcqi_reg; diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 0032d10ac6cf..255a88d08078 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -32,6 +32,14 @@ #ifndef MLX5_IFC_FPGA_H #define MLX5_IFC_FPGA_H +enum { + MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9, +}; + +enum { + MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2, +}; + struct mlx5_ifc_fpga_shell_caps_bits { u8 max_num_qps[0x10]; u8 reserved_at_10[0x8]; @@ -108,6 +116,15 @@ struct mlx5_ifc_fpga_cap_bits { u8 reserved_at_500[0x300]; }; +enum { + MLX5_FPGA_CTRL_OPERATION_LOAD = 0x1, + MLX5_FPGA_CTRL_OPERATION_RESET = 0x2, + MLX5_FPGA_CTRL_OPERATION_FLASH_SELECT = 0x3, + MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON = 0x4, + MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF = 0x5, + MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX = 0x6, +}; + struct mlx5_ifc_fpga_ctrl_bits { u8 reserved_at_0[0x8]; u8 operation[0x8]; @@ -141,4 +158,275 @@ struct mlx5_ifc_fpga_error_event_bits { u8 reserved_at_60[0x80]; }; +#define MLX5_FPGA_ACCESS_REG_SIZE_MAX 64 + +struct mlx5_ifc_fpga_access_reg_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x10]; + u8 size[0x10]; + + u8 address[0x40]; + + u8 data[0][0x8]; +}; + +enum mlx5_ifc_fpga_qp_state { + MLX5_FPGA_QPC_STATE_INIT = 0x0, + MLX5_FPGA_QPC_STATE_ACTIVE = 0x1, + MLX5_FPGA_QPC_STATE_ERROR = 0x2, +}; + +enum mlx5_ifc_fpga_qp_type { + MLX5_FPGA_QPC_QP_TYPE_SHELL_QP = 0x0, + MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP = 0x1, +}; + +enum mlx5_ifc_fpga_qp_service_type { + MLX5_FPGA_QPC_ST_RC = 0x0, +}; + +struct mlx5_ifc_fpga_qpc_bits { + u8 state[0x4]; + u8 reserved_at_4[0x1b]; + u8 qp_type[0x1]; + + u8 reserved_at_20[0x4]; + u8 st[0x4]; + u8 reserved_at_28[0x10]; + u8 traffic_class[0x8]; + + u8 ether_type[0x10]; + u8 prio[0x3]; + u8 dei[0x1]; + u8 vid[0xc]; + + u8 reserved_at_60[0x20]; + + u8 reserved_at_80[0x8]; + u8 next_rcv_psn[0x18]; + + u8 reserved_at_a0[0x8]; + u8 next_send_psn[0x18]; + + u8 reserved_at_c0[0x10]; + u8 pkey[0x10]; + + u8 reserved_at_e0[0x8]; + u8 remote_qpn[0x18]; + + u8 reserved_at_100[0x15]; + u8 rnr_retry[0x3]; + u8 reserved_at_118[0x5]; + u8 retry_count[0x3]; + + u8 reserved_at_120[0x20]; + + u8 reserved_at_140[0x10]; + u8 remote_mac_47_32[0x10]; + + u8 remote_mac_31_0[0x20]; + + u8 remote_ip[16][0x8]; + + u8 reserved_at_200[0x40]; + + u8 reserved_at_240[0x10]; + u8 fpga_mac_47_32[0x10]; + + u8 fpga_mac_31_0[0x20]; + + u8 fpga_ip[16][0x8]; +}; + +struct mlx5_ifc_fpga_create_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_fpga_qpc_bits fpga_qpc; +}; + +struct mlx5_ifc_fpga_create_qp_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x8]; + u8 fpga_qpn[0x18]; + + u8 reserved_at_60[0x20]; + + struct mlx5_ifc_fpga_qpc_bits fpga_qpc; +}; + +struct mlx5_ifc_fpga_modify_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 fpga_qpn[0x18]; + + u8 field_select[0x20]; + + struct mlx5_ifc_fpga_qpc_bits fpga_qpc; +}; + +struct mlx5_ifc_fpga_modify_qp_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_fpga_query_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 fpga_qpn[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_fpga_query_qp_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_fpga_qpc_bits fpga_qpc; +}; + +struct mlx5_ifc_fpga_query_qp_counters_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 clear[0x1]; + u8 reserved_at_41[0x7]; + u8 fpga_qpn[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_fpga_query_qp_counters_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 rx_ack_packets[0x40]; + + u8 rx_send_packets[0x40]; + + u8 tx_ack_packets[0x40]; + + u8 tx_send_packets[0x40]; + + u8 rx_total_drop[0x40]; + + u8 reserved_at_1c0[0x1c0]; +}; + +struct mlx5_ifc_fpga_destroy_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 fpga_qpn[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_fpga_destroy_qp_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ipsec_extended_cap_bits { + u8 encapsulation[0x20]; + + u8 reserved_0[0x15]; + u8 ipv4_fragment[0x1]; + u8 ipv6[0x1]; + u8 esn[0x1]; + u8 lso[0x1]; + u8 transport_and_tunnel_mode[0x1]; + u8 tunnel_mode[0x1]; + u8 transport_mode[0x1]; + u8 ah_esp[0x1]; + u8 esp[0x1]; + u8 ah[0x1]; + u8 ipv4_options[0x1]; + + u8 auth_alg[0x20]; + + u8 enc_alg[0x20]; + + u8 sa_cap[0x20]; + + u8 reserved_1[0x10]; + u8 number_of_ipsec_counters[0x10]; + + u8 ipsec_counters_addr_low[0x20]; + u8 ipsec_counters_addr_high[0x20]; +}; + +struct mlx5_ifc_ipsec_counters_bits { + u8 dec_in_packets[0x40]; + + u8 dec_out_packets[0x40]; + + u8 dec_bypass_packets[0x40]; + + u8 enc_in_packets[0x40]; + + u8 enc_out_packets[0x40]; + + u8 enc_bypass_packets[0x40]; + + u8 drop_dec_packets[0x40]; + + u8 failed_auth_dec_packets[0x40]; + + u8 drop_enc_packets[0x40]; + + u8 success_add_sa[0x40]; + + u8 fail_add_sa[0x40]; + + u8 success_delete_sa[0x40]; + + u8 fail_delete_sa[0x40]; + + u8 dropped_cmd[0x40]; +}; + #endif /* MLX5_IFC_FPGA_H */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 1f637f4d1265..6f41270d80c0 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -225,10 +225,20 @@ enum { MLX5_ETH_WQE_INSERT_VLAN = 1 << 15, }; +enum { + MLX5_ETH_WQE_SWP_INNER_L3_IPV6 = 1 << 0, + MLX5_ETH_WQE_SWP_INNER_L4_UDP = 1 << 1, + MLX5_ETH_WQE_SWP_OUTER_L3_IPV6 = 1 << 4, + MLX5_ETH_WQE_SWP_OUTER_L4_UDP = 1 << 5, +}; + struct mlx5_wqe_eth_seg { - u8 rsvd0[4]; + u8 swp_outer_l4_offset; + u8 swp_outer_l3_offset; + u8 swp_inner_l4_offset; + u8 swp_inner_l3_offset; u8 cs_flags; - u8 rsvd1; + u8 swp_flags; __be16 mss; __be32 rsvd2; union { diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 996711d8a7b4..41d04e9d088a 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -1,7 +1,6 @@ #ifndef _NFNETLINK_H #define _NFNETLINK_H - #include <linux/netlink.h> #include <linux/capability.h> #include <net/netlink.h> @@ -10,13 +9,16 @@ struct nfnl_callback { int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]); + const struct nlattr * const cda[], + struct netlink_ext_ack *extack); int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]); + const struct nlattr * const cda[], + struct netlink_ext_ack *extack); int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]); + const struct nlattr * const cda[], + struct netlink_ext_ack *extack); const struct nla_policy *policy; /* netlink attribute policy */ const u_int16_t attr_count; /* number of nlattr's */ }; diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index e0cbf17af780..2c2a5514b0df 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -122,8 +122,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS)) /* Clear the bit in the hook mask that tells if the rule is on a base chain */ #define CLEAR_BASE_CHAIN_BIT (par->hook_mask &= ~(1 << NF_BR_NUMHOOKS)) -/* True if the target is not a standard target */ -#define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) static inline bool ebt_invalid_target(int target) { diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 1828900c9411..27c0aaa22cb0 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -11,6 +11,7 @@ #include <linux/interrupt.h> #include <linux/rcupdate.h> #include <linux/list.h> +#include <linux/refcount.h> union inet_addr { __u32 all[4]; @@ -34,7 +35,7 @@ struct netpoll { }; struct netpoll_info { - atomic_t refcnt; + refcount_t refcnt; struct semaphore dev_lock; diff --git a/include/linux/phy.h b/include/linux/phy.h index 1d8d70193782..2a9567bb8186 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -372,6 +372,7 @@ struct phy_c45_device_ids { * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. + * loopback_enabled: Set true if this phy has been loopbacked successfully. * state: state of the PHY for management purposes * dev_flags: Device-specific flags used by the PHY driver. * link_timeout: The number of timer firings to wait before the @@ -409,6 +410,7 @@ struct phy_device { bool has_fixups; bool suspended; bool sysfs_links; + bool loopback_enabled; enum phy_state state; @@ -648,6 +650,7 @@ struct phy_driver { int (*set_tunable)(struct phy_device *dev, struct ethtool_tunable *tuna, const void *data); + int (*set_loopback)(struct phy_device *dev, bool enable); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -793,6 +796,7 @@ void phy_device_remove(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); +int phy_loopback(struct phy_device *phydev, bool enable); struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phy_interface_t interface); struct phy_device *phy_find_first(struct mii_bus *bus); @@ -847,6 +851,7 @@ int genphy_update_link(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); +int genphy_loopback(struct phy_device *phydev, bool enable); int genphy_soft_reset(struct phy_device *phydev); static inline int genphy_no_soft_reset(struct phy_device *phydev) { diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h index a6f9d633f5be..9e75ac8d19be 100644 --- a/include/linux/platform_data/nfcmrvl.h +++ b/include/linux/platform_data/nfcmrvl.h @@ -23,7 +23,7 @@ struct nfcmrvl_platform_data { */ /* GPIO that is wired to RESET_N signal */ - unsigned int reset_n_io; + int reset_n_io; /* Tell if transport is muxed in HCI one */ unsigned int hci_muxed; diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h deleted file mode 100644 index f6494b347c06..000000000000 --- a/include/linux/platform_data/st-nci.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Driver include for ST NCI NFC chip family. - * - * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef _ST_NCI_H_ -#define _ST_NCI_H_ - -#define ST_NCI_DRIVER_NAME "st_nci" - -struct st_nci_nfc_platform_data { - unsigned int gpio_reset; - unsigned int irq_polarity; - bool is_ese_present; - bool is_uicc_present; -}; - -#endif /* _ST_NCI_H_ */ diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index a567cbf8c5b4..39e2a2ac2471 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -38,6 +38,8 @@ #include <linux/slab.h> /* dma_addr_t manip */ +#define PTR_LO(x) ((u32)(((uintptr_t)(x)) & 0xffffffff)) +#define PTR_HI(x) ((u32)((((uintptr_t)(x)) >> 16) >> 16)) #define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x)) #define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x)) #define DMA_REGPAIR_LE(x, val) do { \ @@ -778,7 +780,7 @@ enum protocol_type { PROTOCOLID_ROCE, PROTOCOLID_CORE, PROTOCOLID_ETH, - PROTOCOLID_RESERVED4, + PROTOCOLID_IWARP, PROTOCOLID_RESERVED5, PROTOCOLID_PREROCE, PROTOCOLID_COMMON, diff --git a/include/linux/qed/iwarp_common.h b/include/linux/qed/iwarp_common.h new file mode 100644 index 000000000000..b8b3e1cfae90 --- /dev/null +++ b/include/linux/qed/iwarp_common.h @@ -0,0 +1,53 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __IWARP_COMMON__ +#define __IWARP_COMMON__ +#include <linux/qed/rdma_common.h> +/************************/ +/* IWARP FW CONSTANTS */ +/************************/ + +#define IWARP_ACTIVE_MODE 0 +#define IWARP_PASSIVE_MODE 1 + +#define IWARP_SHARED_QUEUE_PAGE_SIZE (0x8000) +#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET (0x4000) +#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE (0x1000) +#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET (0x5000) +#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE (0x3000) + +#define IWARP_REQ_MAX_INLINE_DATA_SIZE (128) +#define IWARP_REQ_MAX_SINGLE_SQ_WQE_SIZE (176) + +#define IWARP_MAX_QPS (64 * 1024) + +#endif /* __IWARP_COMMON__ */ diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 5958b45eb699..dd7a3b86bb9e 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -47,9 +47,10 @@ enum qed_ll2_conn_type { QED_LL2_TYPE_FCOE, QED_LL2_TYPE_ISCSI, QED_LL2_TYPE_TEST, - QED_LL2_TYPE_ISCSI_OOO, + QED_LL2_TYPE_OOO, QED_LL2_TYPE_RESERVED2, QED_LL2_TYPE_ROCE, + QED_LL2_TYPE_IWARP, QED_LL2_TYPE_RESERVED3, MAX_QED_LL2_RX_CONN_TYPE }; diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index ff9be01b5f53..4dd72ba210f5 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -470,6 +470,101 @@ struct qed_rdma_counters_out_params { #define QED_ROCE_TX_HEAD_FAILURE (1) #define QED_ROCE_TX_FRAG_FAILURE (2) +enum qed_iwarp_event_type { + QED_IWARP_EVENT_MPA_REQUEST, /* Passive side request received */ + QED_IWARP_EVENT_PASSIVE_COMPLETE, /* ack on mpa response */ + QED_IWARP_EVENT_ACTIVE_COMPLETE, /* Active side reply received */ + QED_IWARP_EVENT_DISCONNECT, + QED_IWARP_EVENT_CLOSE, + QED_IWARP_EVENT_IRQ_FULL, + QED_IWARP_EVENT_RQ_EMPTY, + QED_IWARP_EVENT_LLP_TIMEOUT, + QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR, + QED_IWARP_EVENT_CQ_OVERFLOW, + QED_IWARP_EVENT_QP_CATASTROPHIC, + QED_IWARP_EVENT_ACTIVE_MPA_REPLY, + QED_IWARP_EVENT_LOCAL_ACCESS_ERROR, + QED_IWARP_EVENT_REMOTE_OPERATION_ERROR, + QED_IWARP_EVENT_TERMINATE_RECEIVED +}; + +enum qed_tcp_ip_version { + QED_TCP_IPV4, + QED_TCP_IPV6, +}; + +struct qed_iwarp_cm_info { + enum qed_tcp_ip_version ip_version; + u32 remote_ip[4]; + u32 local_ip[4]; + u16 remote_port; + u16 local_port; + u16 vlan; + u8 ord; + u8 ird; + u16 private_data_len; + const void *private_data; +}; + +struct qed_iwarp_cm_event_params { + enum qed_iwarp_event_type event; + const struct qed_iwarp_cm_info *cm_info; + void *ep_context; /* To be passed to accept call */ + int status; +}; + +typedef int (*iwarp_event_handler) (void *context, + struct qed_iwarp_cm_event_params *event); + +struct qed_iwarp_connect_in { + iwarp_event_handler event_cb; + void *cb_context; + struct qed_rdma_qp *qp; + struct qed_iwarp_cm_info cm_info; + u16 mss; + u8 remote_mac_addr[ETH_ALEN]; + u8 local_mac_addr[ETH_ALEN]; +}; + +struct qed_iwarp_connect_out { + void *ep_context; +}; + +struct qed_iwarp_listen_in { + iwarp_event_handler event_cb; + void *cb_context; /* passed to event_cb */ + u32 max_backlog; + enum qed_tcp_ip_version ip_version; + u32 ip_addr[4]; + u16 port; + u16 vlan; +}; + +struct qed_iwarp_listen_out { + void *handle; +}; + +struct qed_iwarp_accept_in { + void *ep_context; + void *cb_context; + struct qed_rdma_qp *qp; + const void *private_data; + u16 private_data_len; + u8 ord; + u8 ird; +}; + +struct qed_iwarp_reject_in { + void *ep_context; + void *cb_context; + const void *private_data; + u16 private_data_len; +}; + +struct qed_iwarp_send_rtr_in { + void *ep_context; +}; + struct qed_roce_ll2_header { void *vaddr; dma_addr_t baddr; @@ -491,6 +586,7 @@ struct qed_roce_ll2_packet { enum qed_rdma_type { QED_RDMA_TYPE_ROCE, + QED_RDMA_TYPE_IWARP }; struct qed_dev_rdma_info { @@ -575,6 +671,24 @@ struct qed_rdma_ops { int (*ll2_set_mac_filter)(struct qed_dev *cdev, u8 *old_mac_address, u8 *new_mac_address); + int (*iwarp_connect)(void *rdma_cxt, + struct qed_iwarp_connect_in *iparams, + struct qed_iwarp_connect_out *oparams); + + int (*iwarp_create_listen)(void *rdma_cxt, + struct qed_iwarp_listen_in *iparams, + struct qed_iwarp_listen_out *oparams); + + int (*iwarp_accept)(void *rdma_cxt, + struct qed_iwarp_accept_in *iparams); + + int (*iwarp_reject)(void *rdma_cxt, + struct qed_iwarp_reject_in *iparams); + + int (*iwarp_destroy_listen)(void *rdma_cxt, void *handle); + + int (*iwarp_send_rtr)(void *rdma_cxt, + struct qed_iwarp_send_rtr_in *iparams); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 7a4804c4a593..99e866487e2f 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -57,12 +57,12 @@ #include <uapi/linux/sctp.h> /* Section 3.1. SCTP Common Header Format */ -typedef struct sctphdr { +struct sctphdr { __be16 source; __be16 dest; __be32 vtag; __le32 checksum; -} sctp_sctphdr_t; +}; static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) { @@ -70,11 +70,11 @@ static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) } /* Section 3.2. Chunk Field Descriptions. */ -typedef struct sctp_chunkhdr { +struct sctp_chunkhdr { __u8 type; __u8 flags; __be16 length; -} sctp_chunkhdr_t; +}; /* Section 3.2. Chunk Type Values. @@ -82,7 +82,7 @@ typedef struct sctp_chunkhdr { * Value field. It takes a value from 0 to 254. The value of 255 is * reserved for future use as an extension field. */ -typedef enum { +enum sctp_cid { SCTP_CID_DATA = 0, SCTP_CID_INIT = 1, SCTP_CID_INIT_ACK = 2, @@ -109,7 +109,7 @@ typedef enum { SCTP_CID_ASCONF = 0xC1, SCTP_CID_ASCONF_ACK = 0x80, SCTP_CID_RECONF = 0x82, -} sctp_cid_t; /* enum */ +}; /* enum */ /* Section 3.2 @@ -117,12 +117,12 @@ typedef enum { * the action that must be taken if the processing endpoint does not * recognize the Chunk Type. */ -typedef enum { +enum { SCTP_CID_ACTION_DISCARD = 0x00, SCTP_CID_ACTION_DISCARD_ERR = 0x40, SCTP_CID_ACTION_SKIP = 0x80, SCTP_CID_ACTION_SKIP_ERR = 0xc0, -} sctp_cid_action_t; +}; enum { SCTP_CID_ACTION_MASK = 0xc0, }; @@ -162,12 +162,12 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 }; * Section 3.2.1 Optional/Variable-length Parmaeter Format. */ -typedef struct sctp_paramhdr { +struct sctp_paramhdr { __be16 type; __be16 length; -} sctp_paramhdr_t; +}; -typedef enum { +enum sctp_param { /* RFC 2960 Section 3.3.5 */ SCTP_PARAM_HEARTBEAT_INFO = cpu_to_be16(1), @@ -207,7 +207,7 @@ typedef enum { SCTP_PARAM_RESET_RESPONSE = cpu_to_be16(0x0010), SCTP_PARAM_RESET_ADD_OUT_STREAMS = cpu_to_be16(0x0011), SCTP_PARAM_RESET_ADD_IN_STREAMS = cpu_to_be16(0x0012), -} sctp_param_t; /* enum */ +}; /* enum */ /* RFC 2960 Section 3.2.1 @@ -216,29 +216,29 @@ typedef enum { * not recognize the Parameter Type. * */ -typedef enum { +enum { SCTP_PARAM_ACTION_DISCARD = cpu_to_be16(0x0000), SCTP_PARAM_ACTION_DISCARD_ERR = cpu_to_be16(0x4000), SCTP_PARAM_ACTION_SKIP = cpu_to_be16(0x8000), SCTP_PARAM_ACTION_SKIP_ERR = cpu_to_be16(0xc000), -} sctp_param_action_t; +}; enum { SCTP_PARAM_ACTION_MASK = cpu_to_be16(0xc000), }; /* RFC 2960 Section 3.3.1 Payload Data (DATA) (0) */ -typedef struct sctp_datahdr { +struct sctp_datahdr { __be32 tsn; __be16 stream; __be16 ssn; __be32 ppid; __u8 payload[0]; -} sctp_datahdr_t; +}; -typedef struct sctp_data_chunk { - sctp_chunkhdr_t chunk_hdr; - sctp_datahdr_t data_hdr; -} sctp_data_chunk_t; +struct sctp_data_chunk { + struct sctp_chunkhdr chunk_hdr; + struct sctp_datahdr data_hdr; +}; /* DATA Chuck Specific Flags */ enum { @@ -257,54 +257,54 @@ enum { SCTP_DATA_FRAG_MASK = 0x03, }; * This chunk is used to initiate a SCTP association between two * endpoints. */ -typedef struct sctp_inithdr { +struct sctp_inithdr { __be32 init_tag; __be32 a_rwnd; __be16 num_outbound_streams; __be16 num_inbound_streams; __be32 initial_tsn; __u8 params[0]; -} sctp_inithdr_t; +}; -typedef struct sctp_init_chunk { - sctp_chunkhdr_t chunk_hdr; - sctp_inithdr_t init_hdr; -} sctp_init_chunk_t; +struct sctp_init_chunk { + struct sctp_chunkhdr chunk_hdr; + struct sctp_inithdr init_hdr; +}; /* Section 3.3.2.1. IPv4 Address Parameter (5) */ typedef struct sctp_ipv4addr_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; struct in_addr addr; } sctp_ipv4addr_param_t; /* Section 3.3.2.1. IPv6 Address Parameter (6) */ typedef struct sctp_ipv6addr_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; struct in6_addr addr; } sctp_ipv6addr_param_t; /* Section 3.3.2.1 Cookie Preservative (9) */ typedef struct sctp_cookie_preserve_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __be32 lifespan_increment; } sctp_cookie_preserve_param_t; /* Section 3.3.2.1 Host Name Address (11) */ typedef struct sctp_hostname_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; uint8_t hostname[0]; } sctp_hostname_param_t; /* Section 3.3.2.1 Supported Address Types (12) */ typedef struct sctp_supported_addrs_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __be16 types[0]; } sctp_supported_addrs_param_t; /* Appendix A. ECN Capable (32768) */ typedef struct sctp_ecn_capable_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; } sctp_ecn_capable_param_t; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ @@ -321,19 +321,19 @@ typedef struct sctp_supported_ext_param { /* AUTH Section 3.1 Random */ typedef struct sctp_random_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u8 random_val[0]; } sctp_random_param_t; /* AUTH Section 3.2 Chunk List */ typedef struct sctp_chunks_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u8 chunks[0]; } sctp_chunks_param_t; /* AUTH Section 3.3 HMAC Algorithm */ typedef struct sctp_hmac_algo_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __be16 hmac_ids[0]; } sctp_hmac_algo_param_t; @@ -341,18 +341,18 @@ typedef struct sctp_hmac_algo_param { * The INIT ACK chunk is used to acknowledge the initiation of an SCTP * association. */ -typedef sctp_init_chunk_t sctp_initack_chunk_t; +typedef struct sctp_init_chunk sctp_initack_chunk_t; /* Section 3.3.3.1 State Cookie (7) */ typedef struct sctp_cookie_param { - sctp_paramhdr_t p; + struct sctp_paramhdr p; __u8 body[0]; } sctp_cookie_param_t; /* Section 3.3.3.1 Unrecognized Parameters (8) */ typedef struct sctp_unrecognized_param { - sctp_paramhdr_t param_hdr; - sctp_paramhdr_t unrecognized; + struct sctp_paramhdr param_hdr; + struct sctp_paramhdr unrecognized; } sctp_unrecognized_param_t; @@ -386,7 +386,7 @@ typedef struct sctp_sackhdr { } sctp_sackhdr_t; typedef struct sctp_sack_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_sackhdr_t sack_hdr; } sctp_sack_chunk_t; @@ -399,11 +399,11 @@ typedef struct sctp_sack_chunk { */ typedef struct sctp_heartbeathdr { - sctp_paramhdr_t info; + struct sctp_paramhdr info; } sctp_heartbeathdr_t; typedef struct sctp_heartbeat_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_heartbeathdr_t hb_hdr; } sctp_heartbeat_chunk_t; @@ -413,7 +413,7 @@ typedef struct sctp_heartbeat_chunk { * chunk descriptor. */ typedef struct sctp_abort_chunk { - sctp_chunkhdr_t uh; + struct sctp_chunkhdr uh; } sctp_abort_chunk_t; @@ -425,8 +425,8 @@ typedef struct sctp_shutdownhdr { } sctp_shutdownhdr_t; struct sctp_shutdown_chunk_t { - sctp_chunkhdr_t chunk_hdr; - sctp_shutdownhdr_t shutdown_hdr; + struct sctp_chunkhdr chunk_hdr; + sctp_shutdownhdr_t shutdown_hdr; }; /* RFC 2960. Section 3.3.10 Operation Error (ERROR) (9) */ @@ -438,8 +438,8 @@ typedef struct sctp_errhdr { } sctp_errhdr_t; typedef struct sctp_operr_chunk { - sctp_chunkhdr_t chunk_hdr; - sctp_errhdr_t err_hdr; + struct sctp_chunkhdr chunk_hdr; + sctp_errhdr_t err_hdr; } sctp_operr_chunk_t; /* RFC 2960 3.3.10 - Operation Error @@ -528,7 +528,7 @@ typedef struct sctp_ecnehdr { } sctp_ecnehdr_t; typedef struct sctp_ecne_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_ecnehdr_t ence_hdr; } sctp_ecne_chunk_t; @@ -540,7 +540,7 @@ typedef struct sctp_cwrhdr { } sctp_cwrhdr_t; typedef struct sctp_cwr_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_cwrhdr_t cwr_hdr; } sctp_cwr_chunk_t; @@ -639,7 +639,7 @@ struct sctp_fwdtsn_chunk { * report status of ASCONF processing. */ typedef struct sctp_addip_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __be32 crr_id; } sctp_addip_param_t; @@ -649,7 +649,7 @@ typedef struct sctp_addiphdr { } sctp_addiphdr_t; typedef struct sctp_addip_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_addiphdr_t addip_hdr; } sctp_addip_chunk_t; @@ -709,7 +709,7 @@ typedef struct sctp_authhdr { } sctp_authhdr_t; typedef struct sctp_auth_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_authhdr_t auth_hdr; } sctp_auth_chunk_t; @@ -719,12 +719,12 @@ struct sctp_infox { }; struct sctp_reconf_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; __u8 params[0]; }; struct sctp_strreset_outreq { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 request_seq; __u32 response_seq; __u32 send_reset_at_tsn; @@ -732,18 +732,18 @@ struct sctp_strreset_outreq { }; struct sctp_strreset_inreq { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 request_seq; __u16 list_of_streams[0]; }; struct sctp_strreset_tsnreq { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 request_seq; }; struct sctp_strreset_addstrm { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 request_seq; __u16 number_of_streams; __u16 reserved; @@ -760,13 +760,13 @@ enum { }; struct sctp_strreset_resp { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 response_seq; __u32 result; }; struct sctp_strreset_resptsn { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 response_seq; __u32 result; __u32 senders_next_tsn; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a17e235639ae..3d3ceaac13b1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -252,7 +252,7 @@ struct nf_conntrack { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { - atomic_t use; + refcount_t use; enum { BRNF_PROTO_UNCHANGED, BRNF_PROTO_8021Q, @@ -761,7 +761,7 @@ struct sk_buff { unsigned char *head, *data; unsigned int truesize; - atomic_t users; + refcount_t users; }; #ifdef __KERNEL__ @@ -872,9 +872,9 @@ static inline bool skb_unref(struct sk_buff *skb) { if (unlikely(!skb)) return false; - if (likely(atomic_read(&skb->users) == 1)) + if (likely(refcount_read(&skb->users) == 1)) smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + else if (likely(!refcount_dec_and_test(&skb->users))) return false; return true; @@ -915,7 +915,7 @@ struct sk_buff_fclones { struct sk_buff skb2; - atomic_t fclone_ref; + refcount_t fclone_ref; }; /** @@ -935,7 +935,7 @@ static inline bool skb_fclone_busy(const struct sock *sk, fclones = container_of(skb, struct sk_buff_fclones, skb1); return skb->fclone == SKB_FCLONE_ORIG && - atomic_read(&fclones->fclone_ref) > 1 && + refcount_read(&fclones->fclone_ref) > 1 && fclones->skb2.sk == sk; } @@ -1283,7 +1283,7 @@ static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, */ static inline struct sk_buff *skb_get(struct sk_buff *skb) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); return skb; } @@ -1384,7 +1384,7 @@ static inline void __skb_header_release(struct sk_buff *skb) */ static inline int skb_shared(const struct sk_buff *skb) { - return atomic_read(&skb->users) != 1; + return refcount_read(&skb->users) != 1; } /** @@ -2206,6 +2206,11 @@ static inline int skb_mac_offset(const struct sk_buff *skb) return skb_mac_header(skb) - skb->data; } +static inline u32 skb_mac_header_len(const struct sk_buff *skb) +{ + return skb->network_header - skb->mac_header; +} + static inline int skb_mac_header_was_set(const struct sk_buff *skb) { return skb->mac_header != (typeof(skb->mac_header))~0U; @@ -3589,13 +3594,13 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct) #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) { - if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) + if (nf_bridge && refcount_dec_and_test(&nf_bridge->use)) kfree(nf_bridge); } static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) { if (nf_bridge) - atomic_inc(&nf_bridge->use); + refcount_inc(&nf_bridge->use); } #endif /* CONFIG_BRIDGE_NETFILTER */ static inline void nf_reset(struct sk_buff *skb) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 07ef550c6627..93315d6b21a8 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -84,6 +84,7 @@ struct kmem_cache { int red_left_pad; /* Left redzone padding size */ #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ + struct work_struct kobj_remove_work; #endif #ifdef CONFIG_MEMCG struct memcg_cache_params memcg_params; diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 110f4532188c..f7043ccca81c 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -29,7 +29,6 @@ */ struct tk_read_base { struct clocksource *clock; - u64 (*read)(struct clocksource *cs); u64 mask; u64 cycle_last; u32 mult; @@ -58,7 +57,7 @@ struct tk_read_base { * interval. * @xtime_remainder: Shifted nano seconds left over when rounding * @cycle_interval - * @raw_interval: Raw nano seconds accumulated per NTP interval. + * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. * @ntp_error: Difference between accumulated time and NTP time in ntp * shifted nano seconds. * @ntp_error_shift: Shift conversion between clock shifted nano seconds and @@ -100,7 +99,7 @@ struct timekeeper { u64 cycle_interval; u64 xtime_interval; s64 xtime_remainder; - u32 raw_interval; + u64 raw_interval; /* The ntp_tick_length() value currently being used. * This cached copy ensures we consistently apply the tick * length for an entire tick, as ntp_tick_length may change diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 00d232406f18..021f7a88f52c 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -117,6 +117,9 @@ struct cdc_ncm_ctx { u32 tx_curr_frame_num; u32 rx_max; u32 tx_max; + u32 tx_curr_size; + u32 tx_low_mem_max_cnt; + u32 tx_low_mem_val; u32 max_datagram_size; u16 tx_max_datagrams; u16 tx_remainder; diff --git a/include/net/af_unix.h b/include/net/af_unix.h index fd60eccb59a6..3a385e4767f0 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -4,6 +4,7 @@ #include <linux/socket.h> #include <linux/un.h> #include <linux/mutex.h> +#include <linux/refcount.h> #include <net/sock.h> void unix_inflight(struct user_struct *user, struct file *fp); @@ -21,7 +22,7 @@ extern spinlock_t unix_table_lock; extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { - atomic_t refcnt; + refcount_t refcnt; int len; unsigned int hash; struct sockaddr_un name[0]; diff --git a/include/net/arp.h b/include/net/arp.h index 65619a2de6f4..17d90e4e8dc5 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -28,7 +28,7 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 rcu_read_lock_bh(); n = __ipv4_neigh_lookup_noref(dev, key); - if (n && !atomic_inc_not_zero(&n->refcnt)) + if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock_bh(); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 76c7300626d6..c487bfa2f479 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -5,6 +5,7 @@ #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/fib_rules.h> +#include <linux/refcount.h> #include <net/flow.h> #include <net/rtnetlink.h> @@ -29,7 +30,7 @@ struct fib_rule { struct fib_rule __rcu *ctarget; struct net *fr_net; - atomic_t refcnt; + refcount_t refcnt; u32 pref; int suppress_ifgroup; int suppress_prefixlen; @@ -103,12 +104,12 @@ struct fib_rules_ops { static inline void fib_rule_get(struct fib_rule *rule) { - atomic_inc(&rule->refcnt); + refcount_inc(&rule->refcnt); } static inline void fib_rule_put(struct fib_rule *rule) { - if (atomic_dec_and_test(&rule->refcnt)) + if (refcount_dec_and_test(&rule->refcnt)) kfree_rcu(rule, rcu); } diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 975779d0e7b0..440c1e9d0623 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -50,7 +50,7 @@ struct inet_frag_queue { spinlock_t lock; struct timer_list timer; struct hlist_node list; - atomic_t refcnt; + refcount_t refcnt; struct sk_buff *fragments; struct sk_buff *fragments_tail; ktime_t stamp; @@ -129,7 +129,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { - if (atomic_dec_and_test(&q->refcnt)) + if (refcount_dec_and_test(&q->refcnt)) inet_frag_destroy(q, f); } diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1178931288cb..5026b1f08bb8 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -32,7 +32,7 @@ #include <net/tcp_states.h> #include <net/netns/hash.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include <asm/byteorder.h> /* This is for all connections with a full identity, no wildcards. @@ -334,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net, sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, dport, dif, &refcounted); - if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } @@ -359,7 +359,6 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, refcounted); } -u32 sk_ehashfn(const struct sock *sk); u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport); diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 235c7811a86a..f2a215fc78e4 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -46,7 +46,7 @@ struct inet_peer { struct rcu_head gc_rcu; }; /* - * Once inet_peer is queued for deletion (refcnt == -1), following field + * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid * We can share memory with rcu_head to help keep inet_peer small. */ @@ -60,7 +60,7 @@ struct inet_peer { /* following fields might be frequently dirtied */ __u32 dtime; /* the time of last use of not referenced entries */ - atomic_t refcnt; + refcount_t refcnt; }; struct inet_peer_base { diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 1036c902d2c9..31b1bb11ba3f 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -384,7 +384,7 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(dev, pkey); - if (n && !atomic_inc_not_zero(&n->refcnt)) + if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock_bh(); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 639b67564a7d..afc39e3a3f7c 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -17,6 +17,7 @@ */ #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rcupdate.h> @@ -76,7 +77,7 @@ struct neigh_parms { void *sysctl_table; int dead; - atomic_t refcnt; + refcount_t refcnt; struct rcu_head rcu_head; int reachable_time; @@ -137,7 +138,7 @@ struct neighbour { unsigned long confirmed; unsigned long updated; rwlock_t lock; - atomic_t refcnt; + refcount_t refcnt; struct sk_buff_head arp_queue; unsigned int arp_queue_len_bytes; struct timer_list timer; @@ -395,12 +396,12 @@ void neigh_sysctl_unregister(struct neigh_parms *p); static inline void __neigh_parms_put(struct neigh_parms *parms) { - atomic_dec(&parms->refcnt); + refcount_dec(&parms->refcnt); } static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) { - atomic_inc(&parms->refcnt); + refcount_inc(&parms->refcnt); return parms; } @@ -410,18 +411,18 @@ static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) static inline void neigh_release(struct neighbour *neigh) { - if (atomic_dec_and_test(&neigh->refcnt)) + if (refcount_dec_and_test(&neigh->refcnt)) neigh_destroy(neigh); } static inline struct neighbour * neigh_clone(struct neighbour *neigh) { if (neigh) - atomic_inc(&neigh->refcnt); + refcount_inc(&neigh->refcnt); return neigh; } -#define neigh_hold(n) atomic_inc(&(n)->refcnt) +#define neigh_hold(n) refcount_inc(&(n)->refcnt) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fe80bb48ab1f..31a2b51bef2c 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -5,6 +5,7 @@ #define __NET_NET_NAMESPACE_H #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/workqueue.h> #include <linux/list.h> #include <linux/sysctl.h> @@ -46,7 +47,7 @@ struct netns_ipvs; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { - atomic_t passive; /* To decided when the network + refcount_t passive; /* To decided when the network * namespace should be freed. */ atomic_t count; /* To decided when the network @@ -158,6 +159,7 @@ extern struct net init_net; struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, struct net *old_net); +void net_ns_barrier(void); #else /* CONFIG_NET_NS */ #include <linux/sched.h> #include <linux/nsproxy.h> @@ -168,6 +170,8 @@ static inline struct net *copy_net_ns(unsigned long flags, return ERR_PTR(-EINVAL); return old_net; } + +static inline void net_ns_barrier(void) {} #endif /* CONFIG_NET_NS */ diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 0b0c35c37125..925524ede6c8 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -8,7 +8,7 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); if (likely(skb->nf_bridge)) - atomic_set(&(skb->nf_bridge->use), 1); + refcount_set(&(skb->nf_bridge->use), 1); return skb->nf_bridge; } diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8ece3612d0cd..48407569585d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -225,9 +225,13 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, u32 seq); /* Iterate over all conntracks: if iter returns true, it's deleted. */ -void nf_ct_iterate_cleanup(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report); +void nf_ct_iterate_cleanup_net(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data, u32 portid, int report); + +/* also set unconfirmed conntracks as dying. Only use in module exit path. */ +void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), + void *data); struct nf_conntrack_zone; diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e01559b4d781..6d14b36e3a49 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -71,7 +71,7 @@ struct nf_conntrack_l3proto { struct module *me; }; -extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; +extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL /* Protocol pernet registration. */ @@ -100,7 +100,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; static inline struct nf_conntrack_l3proto * __nf_ct_l3proto_find(u_int16_t l3proto) { - if (unlikely(l3proto >= AF_MAX)) + if (unlikely(l3proto >= NFPROTO_NUMPROTO)) return &nf_conntrack_l3proto_generic; return rcu_dereference(nf_ct_l3protos[l3proto]); } diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8a8bab8d7b15..bd5be0d691d5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -281,6 +281,23 @@ struct nft_set_estimate { enum nft_set_class space; }; +/** + * struct nft_set_type - nf_tables set type + * + * @select_ops: function to select nft_set_ops + * @ops: default ops, used when no select_ops functions is present + * @list: used internally + * @owner: module reference + */ +struct nft_set_type { + const struct nft_set_ops *(*select_ops)(const struct nft_ctx *, + const struct nft_set_desc *desc, + u32 flags); + const struct nft_set_ops *ops; + struct list_head list; + struct module *owner; +}; + struct nft_set_ext; struct nft_expr; @@ -297,8 +314,6 @@ struct nft_expr; * @privsize: function to return size of set private data * @init: initialize private data of new set instance * @destroy: destroy private data of set instance - * @list: nf_tables_set_ops list node - * @owner: module reference * @elemsize: element private size * @features: features supported by the implementation */ @@ -336,7 +351,8 @@ struct nft_set_ops { struct nft_set *set, struct nft_set_iter *iter); - unsigned int (*privsize)(const struct nlattr * const nla[]); + unsigned int (*privsize)(const struct nlattr * const nla[], + const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est); @@ -345,14 +361,13 @@ struct nft_set_ops { const struct nlattr * const nla[]); void (*destroy)(const struct nft_set *set); - struct list_head list; - struct module *owner; unsigned int elemsize; u32 features; + const struct nft_set_type *type; }; -int nft_register_set(struct nft_set_ops *ops); -void nft_unregister_set(struct nft_set_ops *ops); +int nft_register_set(struct nft_set_type *type); +void nft_unregister_set(struct nft_set_type *type); /** * struct nft_set - nf_tables set instance diff --git a/include/net/netlabel.h b/include/net/netlabel.h index efe98068880f..72d6435fc16c 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -37,7 +37,7 @@ #include <linux/in6.h> #include <net/netlink.h> #include <net/request_sock.h> -#include <linux/atomic.h> +#include <linux/refcount.h> struct cipso_v4_doi; struct calipso_doi; @@ -136,7 +136,7 @@ struct netlbl_audit { * */ struct netlbl_lsm_cache { - atomic_t refcount; + refcount_t refcount; void (*free) (const void *data); void *data; }; @@ -295,7 +295,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) cache = kzalloc(sizeof(*cache), flags); if (cache) - atomic_set(&cache->refcount, 1); + refcount_set(&cache->refcount, 1); return cache; } @@ -309,7 +309,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) */ static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache) { - if (!atomic_dec_and_test(&cache->refcount)) + if (!refcount_dec_and_test(&cache->refcount)) return; if (cache->free) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 53ced67c4ae9..23e22054aa60 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -19,6 +19,7 @@ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/bug.h> +#include <linux/refcount.h> #include <net/sock.h> @@ -89,7 +90,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, return NULL; req->rsk_listener = NULL; if (attach_listener) { - if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) { + if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { kmem_cache_free(ops->slab, req); return NULL; } @@ -100,7 +101,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; - atomic_set(&req->rsk_refcnt, 0); + refcount_set(&req->rsk_refcnt, 0); return req; } @@ -108,7 +109,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, static inline void reqsk_free(struct request_sock *req) { /* temporary debugging */ - WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0); + WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); req->rsk_ops->destructor(req); if (req->rsk_listener) @@ -119,7 +120,7 @@ static inline void reqsk_free(struct request_sock *req) static inline void reqsk_put(struct request_sock *req) { - if (atomic_dec_and_test(&req->rsk_refcnt)) + if (refcount_dec_and_test(&req->rsk_refcnt)) reqsk_free(req); } diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index 9b9fb122b31f..171244bd856f 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -97,8 +97,10 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, struct sctp_hmac_algo_param *hmacs); int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, __be16 hmac_id); -int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc); -int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc); +int sctp_auth_send_cid(enum sctp_cid chunk, + const struct sctp_association *asoc); +int sctp_auth_recv_cid(enum sctp_cid chunk, + const struct sctp_association *asoc); void sctp_auth_calculate_hmac(const struct sctp_association *asoc, struct sk_buff *skb, struct sctp_auth_chunk *auth, gfp_t gfp); diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index d4a20d00461c..d4679e7a5ed5 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -132,7 +132,7 @@ typedef union { struct sctp_association *asoc; struct sctp_transport *transport; struct sctp_bind_addr *bp; - sctp_init_chunk_t *init; + struct sctp_init_chunk *init; struct sctp_ulpevent *ulpevent; struct sctp_packet *packet; sctp_sackhdr_t *sackh; @@ -173,7 +173,7 @@ SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) SCTP_ARG_CONSTRUCTOR(TRANSPORT, struct sctp_transport *, transport) SCTP_ARG_CONSTRUCTOR(BA, struct sctp_bind_addr *, bp) -SCTP_ARG_CONSTRUCTOR(PEER_INIT, sctp_init_chunk_t *, init) +SCTP_ARG_CONSTRUCTOR(PEER_INIT, struct sctp_init_chunk *, init) SCTP_ARG_CONSTRUCTOR(ULPEVENT, struct sctp_ulpevent *, ulpevent) SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) SCTP_ARG_CONSTRUCTOR(SACKH, sctp_sackhdr_t *, sackh) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index b07a745ab69f..9b18044c551e 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -130,7 +130,7 @@ typedef enum { */ typedef union { - sctp_cid_t chunk; + enum sctp_cid chunk; sctp_event_timeout_t timeout; sctp_event_other_t other; sctp_event_primitive_t primitive; @@ -141,7 +141,7 @@ static inline sctp_subtype_t \ SCTP_ST_## _name (_type _arg) \ { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; } -SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, sctp_cid_t, chunk) +SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) @@ -152,7 +152,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) /* Calculate the actual data size in a data chunk */ #define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\ - (unsigned long)(c->chunk_hdr)\ - - sizeof(sctp_data_chunk_t))) + - sizeof(struct sctp_data_chunk))) /* Internal error codes */ typedef enum { diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 069582ee5d7f..a9519a06a23b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -470,7 +470,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ - ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ + ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\ pos.v += SCTP_PAD4(ntohs(pos.p->length))) #define sctp_walk_errors(err, chunk_hdr)\ @@ -478,7 +478,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ - sizeof(sctp_chunkhdr_t));\ + sizeof(struct sctp_chunkhdr));\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 47113f2c4b0a..860f378333b5 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -325,19 +325,17 @@ void sctp_generate_heartbeat_event(unsigned long peer); void sctp_generate_reconf_event(unsigned long peer); void sctp_generate_proto_unreach_event(unsigned long peer); -void sctp_ootb_pkt_free(struct sctp_packet *); +void sctp_ootb_pkt_free(struct sctp_packet *packet); -struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *, - const struct sctp_association *, - struct sctp_chunk *, +struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk, gfp_t gfp, int *err, struct sctp_chunk **err_chk_p); -int sctp_addip_addr_config(struct sctp_association *, sctp_param_t, - struct sockaddr_storage*, int); /* 3rd level prototypes */ -__u32 sctp_generate_tag(const struct sctp_endpoint *); -__u32 sctp_generate_tsn(const struct sctp_endpoint *); +__u32 sctp_generate_tag(const struct sctp_endpoint *ep); +__u32 sctp_generate_tsn(const struct sctp_endpoint *ep); /* Extern declarations for major data structures. */ extern sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES]; @@ -349,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sizeof(sctp_data_chunk_t); + size -= sizeof(struct sctp_data_chunk); return size; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e26763bfabd6..07c11fefa8c4 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -310,9 +310,10 @@ struct sctp_cookie { __u32 adaptation_ind; - __u8 auth_random[sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH]; + __u8 auth_random[sizeof(struct sctp_paramhdr) + + SCTP_AUTH_RANDOM_LENGTH]; __u8 auth_hmacs[SCTP_AUTH_NUM_HMACS * sizeof(__u16) + 2]; - __u8 auth_chunks[sizeof(sctp_paramhdr_t) + SCTP_AUTH_MAX_CHUNKS]; + __u8 auth_chunks[sizeof(struct sctp_paramhdr) + SCTP_AUTH_MAX_CHUNKS]; /* This is a shim for my peer's INIT packet, followed by * a copy of the raw address list of the association. @@ -1297,11 +1298,11 @@ int sctp_has_association(struct net *net, const union sctp_addr *laddr, int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - sctp_cid_t, sctp_init_chunk_t *peer_init, + enum sctp_cid cid, struct sctp_init_chunk *peer_init, struct sctp_chunk *chunk, struct sctp_chunk **err_chunk); int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk, const union sctp_addr *peer, - sctp_init_chunk_t *init, gfp_t gfp); + struct sctp_init_chunk *init, gfp_t gfp); __u32 sctp_generate_tag(const struct sctp_endpoint *); __u32 sctp_generate_tsn(const struct sctp_endpoint *); diff --git a/include/net/sock.h b/include/net/sock.h index 00d09140e354..60200f4f4028 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -66,6 +66,7 @@ #include <linux/poll.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <net/dst.h> #include <net/checksum.h> #include <net/tcp_states.h> @@ -219,7 +220,7 @@ struct sock_common { u32 skc_tw_rcv_nxt; /* struct tcp_timewait_sock */ }; - atomic_t skc_refcnt; + refcount_t skc_refcnt; /* private: */ int skc_dontcopy_end[0]; union { @@ -390,7 +391,7 @@ struct sock { /* ===== cache line for TX ===== */ int sk_wmem_queued; - atomic_t sk_wmem_alloc; + refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; struct sk_buff *sk_send_head; struct sk_buff_head sk_write_queue; @@ -611,7 +612,7 @@ static inline bool __sk_del_node_init(struct sock *sk) static __always_inline void sock_hold(struct sock *sk) { - atomic_inc(&sk->sk_refcnt); + refcount_inc(&sk->sk_refcnt); } /* Ungrab socket in the context, which assumes that socket refcnt @@ -619,7 +620,7 @@ static __always_inline void sock_hold(struct sock *sk) */ static __always_inline void __sock_put(struct sock *sk) { - atomic_dec(&sk->sk_refcnt); + refcount_dec(&sk->sk_refcnt); } static inline bool sk_del_node_init(struct sock *sk) @@ -628,7 +629,7 @@ static inline bool sk_del_node_init(struct sock *sk) if (rc) { /* paranoid for a while -acme */ - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } return rc; @@ -650,7 +651,7 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk) if (rc) { /* paranoid for a while -acme */ - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } return rc; @@ -1144,9 +1145,9 @@ static inline void sk_refcnt_debug_dec(struct sock *sk) static inline void sk_refcnt_debug_release(const struct sock *sk) { - if (atomic_read(&sk->sk_refcnt) != 1) + if (refcount_read(&sk->sk_refcnt) != 1) printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", - sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); + sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt)); } #else /* SOCK_REFCNT_DEBUG */ #define sk_refcnt_debug_inc(sk) do { } while (0) @@ -1636,7 +1637,7 @@ void sock_init_data(struct socket *sock, struct sock *sk); /* Ungrab socket and destroy it, if it was the last reference. */ static inline void sock_put(struct sock *sk) { - if (atomic_dec_and_test(&sk->sk_refcnt)) + if (refcount_dec_and_test(&sk->sk_refcnt)) sk_free(sk); } /* Generic version of sock_put(), dealing with all sockets @@ -1911,7 +1912,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro */ static inline int sk_wmem_alloc_get(const struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) - 1; + return refcount_read(&sk->sk_wmem_alloc) - 1; } /** @@ -2055,7 +2056,7 @@ static inline unsigned long sock_wspace(struct sock *sk) int amt = 0; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc); if (amt < 0) amt = 0; } @@ -2136,7 +2137,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); */ static inline bool sock_writeable(const struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); + return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); } static inline gfp_t gfp_any(void) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index c784a6ac6ef1..8ae9e3b6392e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -217,6 +217,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b); + +#define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops)) #else static inline void switchdev_deferred_process(void) @@ -322,6 +324,8 @@ static inline bool switchdev_port_same_parent_id(struct net_device *a, return false; } +#define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0) + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d0751b79d99c..70483296157f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -46,6 +46,10 @@ #include <linux/seq_file.h> #include <linux/memcontrol.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/bpf-cgroup.h> + extern struct inet_hashinfo tcp_hashinfo; extern struct percpu_counter tcp_orphan_count; @@ -1000,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); -int tcp_set_congestion_control(struct sock *sk, const char *name); +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load); +void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca); u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); @@ -2021,4 +2027,62 @@ int tcp_set_ulp(struct sock *sk, const char *name); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); +/* Call BPF_SOCK_OPS program that returns an int. If the return value + * is < 0, then the BPF op failed (for example if the loaded BPF + * program does not support the chosen operation or there is no BPF + * program loaded). + */ +#ifdef CONFIG_BPF +static inline int tcp_call_bpf(struct sock *sk, int op) +{ + struct bpf_sock_ops_kern sock_ops; + int ret; + + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + + memset(&sock_ops, 0, sizeof(sock_ops)); + sock_ops.sk = sk; + sock_ops.op = op; + + ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); + if (ret == 0) + ret = sock_ops.reply; + else + ret = -1; + return ret; +} +#else +static inline int tcp_call_bpf(struct sock *sk, int op) +{ + return -EPERM; +} +#endif + +static inline u32 tcp_timeout_init(struct sock *sk) +{ + int timeout; + + timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT); + + if (timeout <= 0) + timeout = TCP_TIMEOUT_INIT; + return timeout; +} + +static inline u32 tcp_rwnd_init_bpf(struct sock *sk) +{ + int rwnd; + + rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT); + + if (rwnd < 0) + rwnd = 0; + return rwnd; +} + +static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) +{ + return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); +} #endif /* _TCP_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 1468dbd0f09a..972ce4baab6b 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -302,6 +302,67 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport); +/* UDP uses skb->dev_scratch to cache as much information as possible and avoid + * possibly multiple cache miss on dequeue() + */ +#if BITS_PER_LONG == 64 + +/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on + * cold cache lines at recvmsg time. + * skb->len can be stored on 16 bits since the udp header has been already + * validated and pulled. + */ +struct udp_dev_scratch { + u32 truesize; + u16 len; + bool is_linear; + bool csum_unnecessary; +}; + +static inline unsigned int udp_skb_len(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->len; +} + +static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary; +} + +static inline bool udp_skb_is_linear(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear; +} + +#else +static inline unsigned int udp_skb_len(struct sk_buff *skb) +{ + return skb->len; +} + +static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) +{ + return skb_csum_unnecessary(skb); +} + +static inline bool udp_skb_is_linear(struct sk_buff *skb) +{ + return !skb_is_nonlinear(skb); +} +#endif + +static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, + struct iov_iter *to) +{ + int n, copy = len - off; + + n = copy_to_iter(skb->data + off, copy, to); + if (n == copy) + return 0; + + return -EFAULT; +} + /* * SNMP statistics for UDP and UDP-Lite */ diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b816a0a6686e..326e8498b10e 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -221,9 +221,17 @@ struct vxlan_config { bool no_share; }; +struct vxlan_dev_node { + struct hlist_node hlist; + struct vxlan_dev *vxlan; +}; + /* Pseudo network device */ struct vxlan_dev { - struct hlist_node hlist; /* vni hash table */ + struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ +#endif struct list_head next; /* vxlan's per namespace list */ struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 01f5bc144ee5..01fa357e9a32 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1854,8 +1854,9 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) } #endif -#ifdef CONFIG_XFRM_OFFLOAD void __net_init xfrm_dev_init(void); + +#ifdef CONFIG_XFRM_OFFLOAD int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo); @@ -1881,10 +1882,6 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) } } #else -static inline void __net_init xfrm_dev_init(void) -{ -} - static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) { return 0; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f94b48b168dc..e99e3e6f8b37 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -120,12 +120,14 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_IN, BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, + BPF_PROG_TYPE_SOCK_OPS, }; enum bpf_attach_type { BPF_CGROUP_INET_INGRESS, BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_SOCK_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -518,6 +520,25 @@ union bpf_attr { * Set full skb->hash. * @skb: pointer to skb * @hash: hash to set + * + * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) + * Calls setsockopt. Not all opts are available, only those with + * integer optvals plus TCP_CONGESTION. + * Supported levels: SOL_SOCKET and IPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: SOL_SOCKET or IPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in byes + * Return: 0 or negative error + * + * int bpf_skb_adjust_room(skb, len_diff, mode, flags) + * Grow or shrink room in sk_buff. + * @skb: pointer to skb + * @len_diff: (signed) amount of room to grow/shrink + * @mode: operation mode (enum bpf_adj_room_mode) + * @flags: reserved for future use + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -568,7 +589,9 @@ union bpf_attr { FN(probe_read_str), \ FN(get_socket_cookie), \ FN(get_socket_uid), \ - FN(set_hash), + FN(set_hash), \ + FN(setsockopt), \ + FN(skb_adjust_room), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -618,6 +641,11 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Mode for BPF_FUNC_skb_adjust_room helper. */ +enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -720,4 +748,56 @@ struct bpf_map_info { __u32 map_flags; } __attribute__((aligned(8))); +/* User bpf_sock_ops struct to access socket values and specify request ops + * and their replies. + * Some of this fields are in network (bigendian) byte order and may need + * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h). + * New fields can only be added at the end of this structure + */ +struct bpf_sock_ops { + __u32 op; + union { + __u32 reply; + __u32 replylong[4]; + }; + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ +}; + +/* List of known BPF sock_ops operators. + * New entries can only be added at the end + */ +enum { + BPF_SOCK_OPS_VOID, + BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or + * -1 if default value should be used + */ + BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized + * window (in packets) or -1 if default + * value should be used + */ + BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an + * active connection is initialized + */ + BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an + * active connection is + * established + */ + BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a + * passive connection is + * established + */ + BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control + * needs ECN + */ +}; + +#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ +#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index ced9d8b97426..6217ff8500a1 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -121,6 +121,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_RESET_STREAMS 119 #define SCTP_RESET_ASSOC 120 #define SCTP_ADD_STREAMS 121 +#define SCTP_SOCKOPT_PEELOFF_FLAGS 122 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -978,6 +979,11 @@ typedef struct { int sd; } sctp_peeloff_arg_t; +typedef struct { + sctp_peeloff_arg_t p_arg; + unsigned flags; +} sctp_peeloff_flags_arg_t; + /* * Peer Address Thresholds socket option */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index ecb43542246e..d771a3872500 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -335,6 +335,26 @@ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) } /* only called from syscall */ +int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) +{ + void **elem, *ptr; + int ret = 0; + + if (!map->ops->map_fd_sys_lookup_elem) + return -ENOTSUPP; + + rcu_read_lock(); + elem = array_map_lookup_elem(map, key); + if (elem && (ptr = READ_ONCE(*elem))) + *value = map->ops->map_fd_sys_lookup_elem(ptr); + else + ret = -ENOENT; + rcu_read_unlock(); + + return ret; +} + +/* only called from syscall */ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags) { @@ -400,6 +420,11 @@ static void prog_fd_array_put_ptr(void *ptr) bpf_prog_put(ptr); } +static u32 prog_fd_array_sys_lookup_elem(void *ptr) +{ + return ((struct bpf_prog *)ptr)->aux->id; +} + /* decrement refcnt of all bpf_progs that are stored in this map */ void bpf_fd_array_map_clear(struct bpf_map *map) { @@ -418,6 +443,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = prog_fd_array_get_ptr, .map_fd_put_ptr = prog_fd_array_put_ptr, + .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, @@ -585,4 +611,5 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = bpf_map_fd_get_ptr, .map_fd_put_ptr = bpf_map_fd_put_ptr, + .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, }; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ea6033cba947..546113430049 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -236,3 +236,40 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, return ret; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); + +/** + * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock + * @sk: socket to get cgroup from + * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains + * sk with connection information (IP addresses, etc.) May not contain + * cgroup info if it is a req sock. + * @type: The type of program to be exectuted + * + * socket passed is expected to be of type INET or INET6. + * + * The program type passed in via @type must be suitable for sock_ops + * filtering. No further check is performed to assert that. + * + * This function will return %-EPERM if any if an attached program was found + * and if it returned != 1 during execution. In all other cases, 0 is returned. + */ +int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, + struct bpf_sock_ops_kern *sock_ops, + enum bpf_attach_type type) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog *prog; + int ret = 0; + + + rcu_read_lock(); + + prog = rcu_dereference(cgrp->bpf.effective[type]); + if (prog) + ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM; + + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 774069ca18a7..ad5f55922a13 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1297,7 +1297,9 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { - fp->bpf_func = interpreters[round_down(fp->aux->stack_depth, 32) / 32]; + u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); + + fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; /* eBPF JITs can rewrite the program in case constant * blinding is active. However, in case of error during diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 004334ea13ba..4fb463172aa8 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1244,6 +1244,26 @@ static void fd_htab_map_free(struct bpf_map *map) } /* only called from syscall */ +int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) +{ + void **ptr; + int ret = 0; + + if (!map->ops->map_fd_sys_lookup_elem) + return -ENOTSUPP; + + rcu_read_lock(); + ptr = htab_map_lookup_elem(map, key); + if (ptr) + *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr)); + else + ret = -ENOENT; + rcu_read_unlock(); + + return ret; +} + +/* only called from syscall */ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags) { @@ -1305,4 +1325,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = { .map_delete_elem = htab_map_delete_elem, .map_fd_get_ptr = bpf_map_fd_get_ptr, .map_fd_put_ptr = bpf_map_fd_put_ptr, + .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, }; diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 59bcdf821ae4..1da574612bea 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -95,3 +95,8 @@ void bpf_map_fd_put_ptr(void *ptr) */ bpf_map_put(ptr); } + +u32 bpf_map_fd_sys_lookup_elem(void *ptr) +{ + return ((struct bpf_map *)ptr)->id; +} diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h index 177fadb689dc..6183db9ec08c 100644 --- a/kernel/bpf/map_in_map.h +++ b/kernel/bpf/map_in_map.h @@ -19,5 +19,6 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file, int ufd); void bpf_map_fd_put_ptr(void *ptr); +u32 bpf_map_fd_sys_lookup_elem(void *ptr); #endif diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8942c820d620..18980472f5b0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -24,6 +24,13 @@ #include <linux/kernel.h> #include <linux/idr.h> +#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ + (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ + (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ + (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) +#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) +#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map)) + DEFINE_PER_CPU(int, bpf_prog_active); static DEFINE_IDR(prog_idr); static DEFINE_SPINLOCK(prog_idr_lock); @@ -209,10 +216,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) const struct bpf_map *map = filp->private_data; const struct bpf_array *array; u32 owner_prog_type = 0; + u32 owner_jited = 0; if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { array = container_of(map, struct bpf_array, map); owner_prog_type = array->owner_prog_type; + owner_jited = array->owner_jited; } seq_printf(m, @@ -229,9 +238,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->map_flags, map->pages * 1ULL << PAGE_SHIFT); - if (owner_prog_type) + if (owner_prog_type) { seq_printf(m, "owner_prog_type:\t%u\n", owner_prog_type); + seq_printf(m, "owner_jited:\t%u\n", + owner_jited); + } } #endif @@ -411,6 +423,8 @@ static int map_lookup_elem(union bpf_attr *attr) map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) value_size = round_up(map->value_size, 8) * num_possible_cpus(); + else if (IS_FD_MAP(map)) + value_size = sizeof(u32); else value_size = map->value_size; @@ -426,9 +440,10 @@ static int map_lookup_elem(union bpf_attr *attr) err = bpf_percpu_array_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { err = bpf_stackmap_copy(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || - map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { - err = -ENOTSUPP; + } else if (IS_FD_ARRAY(map)) { + err = bpf_fd_array_map_lookup_elem(map, key, value); + } else if (IS_FD_HASH(map)) { + err = bpf_fd_htab_map_lookup_elem(map, key, value); } else { rcu_read_lock(); ptr = map->ops->map_lookup_elem(map, key); @@ -1069,6 +1084,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_INET_SOCK_CREATE: ptype = BPF_PROG_TYPE_CGROUP_SOCK; break; + case BPF_CGROUP_SOCK_OPS: + ptype = BPF_PROG_TYPE_SOCK_OPS; + break; default: return -EINVAL; } @@ -1109,6 +1127,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_SOCK_OPS: cgrp = cgroup_get_from_fd(attr->target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp); @@ -1123,6 +1142,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) return ret; } + #endif /* CONFIG_CGROUP_BPF */ #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 74ea96ea391b..6a86723c5b64 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -546,20 +546,6 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, return 0; } -static int bpf_size_to_bytes(int bpf_size) -{ - if (bpf_size == BPF_W) - return 4; - else if (bpf_size == BPF_H) - return 2; - else if (bpf_size == BPF_B) - return 1; - else if (bpf_size == BPF_DW) - return 8; - else - return -EINVAL; -} - static bool is_spillable_regtype(enum bpf_reg_type type) { switch (type) { @@ -761,7 +747,9 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { - struct bpf_insn_access_aux info = { .reg_type = *reg_type }; + struct bpf_insn_access_aux info = { + .reg_type = *reg_type, + }; /* for analyzer ctx accesses are already validated and converted */ if (env->analyzer_ops) @@ -769,25 +757,14 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, if (env->prog->aux->ops->is_valid_access && env->prog->aux->ops->is_valid_access(off, size, t, &info)) { - /* a non zero info.ctx_field_size indicates: - * . For this field, the prog type specific ctx conversion algorithm - * only supports whole field access. - * . This ctx access is a candiate for later verifier transformation - * to load the whole field and then apply a mask to get correct result. - * a non zero info.converted_op_size indicates perceived actual converted - * value width in convert_ctx_access. + /* A non zero info.ctx_field_size indicates that this field is a + * candidate for later verifier transformation to load the whole + * field and then apply a mask when accessed with a narrower + * access than actual ctx access size. A zero info.ctx_field_size + * will only allow for whole field access and rejects any other + * type of narrower access. */ - if ((info.ctx_field_size && !info.converted_op_size) || - (!info.ctx_field_size && info.converted_op_size)) { - verbose("verifier bug in is_valid_access prog type=%u off=%d size=%d\n", - env->prog->type, off, size); - return -EACCES; - } - - if (info.ctx_field_size) { - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; - env->insn_aux_data[insn_idx].converted_op_size = info.converted_op_size; - } + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; *reg_type = info.reg_type; /* remember the offset of last byte accessed in ctx */ @@ -1016,6 +993,11 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins if (err) return err; + if (is_pointer_value(env, insn->src_reg)) { + verbose("R%d leaks addr into mem\n", insn->src_reg); + return -EACCES; + } + /* check whether atomic_add can read the memory */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); @@ -1675,6 +1657,65 @@ static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } +static int evaluate_reg_imm_alu_unknown(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + u8 opcode = BPF_OP(insn->code); + s64 imm_log2 = __ilog2_u64((long long)dst_reg->imm); + + /* BPF_X code with src_reg->type UNKNOWN_VALUE here. */ + if (src_reg->imm > 0 && dst_reg->imm) { + switch (opcode) { + case BPF_ADD: + /* dreg += sreg + * where both have zero upper bits. Adding them + * can only result making one more bit non-zero + * in the larger value. + * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47) + * 0xffff (imm=48) + 0xffff = 0x1fffe (imm=47) + */ + dst_reg->imm = min(src_reg->imm, 63 - imm_log2); + dst_reg->imm--; + break; + case BPF_AND: + /* dreg &= sreg + * AND can not extend zero bits only shrink + * Ex. 0x00..00ffffff + * & 0x0f..ffffffff + * ---------------- + * 0x00..00ffffff + */ + dst_reg->imm = max(src_reg->imm, 63 - imm_log2); + break; + case BPF_OR: + /* dreg |= sreg + * OR can only extend zero bits + * Ex. 0x00..00ffffff + * | 0x0f..ffffffff + * ---------------- + * 0x0f..00ffffff + */ + dst_reg->imm = min(src_reg->imm, 63 - imm_log2); + break; + case BPF_SUB: + case BPF_MUL: + case BPF_RSH: + case BPF_LSH: + /* These may be flushed out later */ + default: + mark_reg_unknown_value(regs, insn->dst_reg); + } + } else { + mark_reg_unknown_value(regs, insn->dst_reg); + } + + dst_reg->type = UNKNOWN_VALUE; + return 0; +} + static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) { @@ -1684,6 +1725,9 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, u8 opcode = BPF_OP(insn->code); u64 dst_imm = dst_reg->imm; + if (BPF_SRC(insn->code) == BPF_X && src_reg->type == UNKNOWN_VALUE) + return evaluate_reg_imm_alu_unknown(env, insn); + /* dst_reg->type == CONST_IMM here. Simulate execution of insns * containing ALU ops. Don't care about overflow or negative * values, just add/sub/... them; registers are in u64. @@ -3396,11 +3440,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of static int convert_ctx_accesses(struct bpf_verifier_env *env) { const struct bpf_verifier_ops *ops = env->prog->aux->ops; + int i, cnt, size, ctx_field_size, delta = 0; const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i, cnt, off, size, ctx_field_size, converted_op_size, is_narrower_load, delta = 0; + bool is_narrower_load; + u32 target_size; if (ops->gen_prologue) { cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, @@ -3440,39 +3486,50 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; - off = insn->off; - size = bpf_size_to_bytes(BPF_SIZE(insn->code)); ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; - converted_op_size = env->insn_aux_data[i + delta].converted_op_size; - is_narrower_load = type == BPF_READ && size < ctx_field_size; + size = BPF_LDST_BYTES(insn); /* If the read access is a narrower load of the field, * convert to a 4/8-byte load, to minimum program type specific * convert_ctx_access changes. If conversion is successful, * we will apply proper mask to the result. */ + is_narrower_load = size < ctx_field_size; if (is_narrower_load) { - int size_code = BPF_H; + u32 off = insn->off; + u8 size_code; + if (type == BPF_WRITE) { + verbose("bpf verifier narrow ctx access misconfigured\n"); + return -EINVAL; + } + + size_code = BPF_H; if (ctx_field_size == 4) size_code = BPF_W; else if (ctx_field_size == 8) size_code = BPF_DW; + insn->off = off & ~(ctx_field_size - 1); insn->code = BPF_LDX | BPF_MEM | size_code; } - cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog); - if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + + target_size = 0; + cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog, + &target_size); + if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || + (ctx_field_size && !target_size)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; } - if (is_narrower_load && size < converted_op_size) { + + if (is_narrower_load && size < target_size) { if (ctx_field_size <= 4) insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, - (1 << size * 8) - 1); + (1 << size * 8) - 1); else insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, - (1 << size * 8) - 1); + (1 << size * 8) - 1); } new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 2831480c63a2..ee97196bb151 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -580,7 +580,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, int ret = -ENOMEM, max_order = 0; if (!has_aux(event)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) { /* diff --git a/kernel/signal.c b/kernel/signal.c index ca92bcfeb322..45b4c1ffe14e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -510,7 +510,8 @@ int unhandled_signal(struct task_struct *tsk, int sig) return !tsk->ptrace; } -static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) +static void collect_signal(int sig, struct sigpending *list, siginfo_t *info, + bool *resched_timer) { struct sigqueue *q, *first = NULL; @@ -532,6 +533,12 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) still_pending: list_del_init(&first->list); copy_siginfo(info, &first->info); + + *resched_timer = + (first->flags & SIGQUEUE_PREALLOC) && + (info->si_code == SI_TIMER) && + (info->si_sys_private); + __sigqueue_free(first); } else { /* @@ -548,12 +555,12 @@ still_pending: } static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, - siginfo_t *info) + siginfo_t *info, bool *resched_timer) { int sig = next_signal(pending, mask); if (sig) - collect_signal(sig, pending, info); + collect_signal(sig, pending, info, resched_timer); return sig; } @@ -565,15 +572,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, */ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) { + bool resched_timer = false; int signr; /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ - signr = __dequeue_signal(&tsk->pending, mask, info); + signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer); if (!signr) { signr = __dequeue_signal(&tsk->signal->shared_pending, - mask, info); + mask, info, &resched_timer); #ifdef CONFIG_POSIX_TIMERS /* * itimer signal ? @@ -621,7 +629,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) current->jobctl |= JOBCTL_STOP_DEQUEUED; } #ifdef CONFIG_POSIX_TIMERS - if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { + if (resched_timer) { /* * Release the siglock to ensure proper locking order * of timer locks outside of siglocks. Note, we leave diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 9652bc57fd09..b602c48cb841 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) tk->offs_boot = ktime_add(tk->offs_boot, delta); } +/* + * tk_clock_read - atomic clocksource read() helper + * + * This helper is necessary to use in the read paths because, while the + * seqlock ensures we don't return a bad value while structures are updated, + * it doesn't protect from potential crashes. There is the possibility that + * the tkr's clocksource may change between the read reference, and the + * clock reference passed to the read function. This can cause crashes if + * the wrong clocksource is passed to the wrong read function. + * This isn't necessary to use when holding the timekeeper_lock or doing + * a read of the fast-timekeeper tkrs (which is protected by its own locking + * and update logic). + */ +static inline u64 tk_clock_read(struct tk_read_base *tkr) +{ + struct clocksource *clock = READ_ONCE(tkr->clock); + + return clock->read(clock); +} + #ifdef CONFIG_DEBUG_TIMEKEEPING #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ @@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr) */ do { seq = read_seqcount_begin(&tk_core.seq); - now = tkr->read(tkr->clock); + now = tk_clock_read(tkr); last = tkr->cycle_last; mask = tkr->mask; max = tkr->clock->max_cycles; @@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr) u64 cycle_now, delta; /* read clocksource */ - cycle_now = tkr->read(tkr->clock); + cycle_now = tk_clock_read(tkr); /* calculate the delta since the last update_wall_time */ delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); @@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) ++tk->cs_was_changed_seq; old_clock = tk->tkr_mono.clock; tk->tkr_mono.clock = clock; - tk->tkr_mono.read = clock->read; tk->tkr_mono.mask = clock->mask; - tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); + tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono); tk->tkr_raw.clock = clock; - tk->tkr_raw.read = clock->read; tk->tkr_raw.mask = clock->mask; tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; @@ -262,7 +280,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) /* Go back from cycles -> shifted ns */ tk->xtime_interval = interval * clock->mult; tk->xtime_remainder = ntpinterval - tk->xtime_interval; - tk->raw_interval = (interval * clock->mult) >> clock->shift; + tk->raw_interval = interval * clock->mult; /* if changing clocks, convert xtime_nsec shift units */ if (old_clock) { @@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) now += timekeeping_delta_to_ns(tkr, clocksource_delta( - tkr->read(tkr->clock), + tk_clock_read(tkr), tkr->cycle_last, tkr->mask)); } while (read_seqcount_retry(&tkf->seq, seq)); @@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs) return cycles_at_suspend; } +static struct clocksource dummy_clock = { + .read = dummy_clock_read, +}; + /** * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. * @tk: Timekeeper to snapshot. @@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk) struct tk_read_base *tkr = &tk->tkr_mono; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); - cycles_at_suspend = tkr->read(tkr->clock); - tkr_dummy.read = dummy_clock_read; + cycles_at_suspend = tk_clock_read(tkr); + tkr_dummy.clock = &dummy_clock; update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); tkr = &tk->tkr_raw; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); - tkr_dummy.read = dummy_clock_read; + tkr_dummy.clock = &dummy_clock; update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); } @@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) */ static void timekeeping_forward_now(struct timekeeper *tk) { - struct clocksource *clock = tk->tkr_mono.clock; u64 cycle_now, delta; u64 nsec; - cycle_now = tk->tkr_mono.read(clock); + cycle_now = tk_clock_read(&tk->tkr_mono); delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); tk->tkr_mono.cycle_last = cycle_now; tk->tkr_raw.cycle_last = cycle_now; @@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) do { seq = read_seqcount_begin(&tk_core.seq); - - now = tk->tkr_mono.read(tk->tkr_mono.clock); + now = tk_clock_read(&tk->tkr_mono); systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; base_real = ktime_add(tk->tkr_mono.base, @@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn) * Check whether the system counter value provided by the * device driver is on the current timekeeping interval. */ - now = tk->tkr_mono.read(tk->tkr_mono.clock); + now = tk_clock_read(&tk->tkr_mono); interval_start = tk->tkr_mono.cycle_last; if (!cycle_between(interval_start, cycles, now)) { clock_was_set_seq = tk->clock_was_set_seq; @@ -1629,7 +1649,7 @@ void timekeeping_resume(void) * The less preferred source will only be tried if there is no better * usable source. The rtc part is handled separately in rtc core code. */ - cycle_now = tk->tkr_mono.read(clock); + cycle_now = tk_clock_read(&tk->tkr_mono); if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && cycle_now > tk->tkr_mono.cycle_last) { u64 nsec, cyc_delta; @@ -1976,7 +1996,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, u32 shift, unsigned int *clock_set) { u64 interval = tk->cycle_interval << shift; - u64 raw_nsecs; + u64 snsec_per_sec; /* If the offset is smaller than a shifted interval, do nothing */ if (offset < interval) @@ -1991,14 +2011,15 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ - raw_nsecs = (u64)tk->raw_interval << shift; - raw_nsecs += tk->raw_time.tv_nsec; - if (raw_nsecs >= NSEC_PER_SEC) { - u64 raw_secs = raw_nsecs; - raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); - tk->raw_time.tv_sec += raw_secs; + tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift; + tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; + snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; + while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) { + tk->tkr_raw.xtime_nsec -= snsec_per_sec; + tk->raw_time.tv_sec++; } - tk->raw_time.tv_nsec = raw_nsecs; + tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift; + tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift; /* Accumulate error between NTP and clock interval */ tk->ntp_error += tk->ntp_tick << shift; @@ -2030,7 +2051,7 @@ void update_wall_time(void) #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = real_tk->cycle_interval; #else - offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock), + offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), tk->tkr_mono.cycle_last, tk->tkr_mono.mask); #endif diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 97c46b440cd6..37385193a608 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -122,8 +122,8 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) } /* - * limited trace_printk() - * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed + * Only limited trace_printk() conversion specifiers allowed: + * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s */ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) @@ -198,7 +198,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, i++; } - if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') + if (fmt[i] != 'i' && fmt[i] != 'd' && + fmt[i] != 'u' && fmt[i] != 'x') return -EINVAL; fmt_cnt++; } @@ -583,7 +584,8 @@ const struct bpf_verifier_ops tracepoint_prog_ops = { static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { - int sample_period_off; + const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data, + sample_period); if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) return false; @@ -592,43 +594,35 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type if (off % size != 0) return false; - /* permit 1, 2, 4 byte narrower and 8 normal read access to sample_period */ - sample_period_off = offsetof(struct bpf_perf_event_data, sample_period); - if (off >= sample_period_off && off < sample_period_off + sizeof(__u64)) { - int allowed; - -#ifdef __LITTLE_ENDIAN - allowed = (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0; -#else - allowed = ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0; -#endif - if (!allowed) + switch (off) { + case bpf_ctx_range(struct bpf_perf_event_data, sample_period): + bpf_ctx_record_field_size(info, size_sp); + if (!bpf_ctx_narrow_access_ok(off, size, size_sp)) return false; - info->ctx_field_size = 8; - info->converted_op_size = 8; - } else { + break; + default: if (size != sizeof(long)) return false; } + return true; } static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; switch (si->off) { case offsetof(struct bpf_perf_event_data, sample_period): - BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64)); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, data), si->dst_reg, si->src_reg, offsetof(struct bpf_perf_event_data_kern, data)); *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, - offsetof(struct perf_sample_data, period)); + bpf_target_off(struct perf_sample_data, period, 8, + target_size)); break; default: *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, diff --git a/lib/cmdline.c b/lib/cmdline.c index 3c6432df7e63..4c0888c4a68d 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -23,14 +23,14 @@ * the values[M, M+1, ..., N] into the ints array in get_options. */ -static int get_range(char **str, int *pint) +static int get_range(char **str, int *pint, int n) { int x, inc_counter, upper_range; (*str)++; upper_range = simple_strtol((*str), NULL, 0); inc_counter = upper_range - *pint; - for (x = *pint; x < upper_range; x++) + for (x = *pint; n && x < upper_range; x++, n--) *pint++ = x; return inc_counter; } @@ -97,7 +97,7 @@ char *get_options(const char *str, int nints, int *ints) break; if (res == 3) { int range_nums; - range_nums = get_range((char **)&str, ints + i); + range_nums = get_range((char **)&str, ints + i, nints - i); if (range_nums < 0) break; /* diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 945fd1ca49b5..df4ebdb2b10a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -652,7 +652,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, spin_unlock(ptl); free_page_and_swap_cache(src_page); } - cond_resched(); } } diff --git a/mm/slub.c b/mm/slub.c index 7449593fca72..8addc535bcdc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5625,6 +5625,28 @@ static char *create_unique_id(struct kmem_cache *s) return name; } +static void sysfs_slab_remove_workfn(struct work_struct *work) +{ + struct kmem_cache *s = + container_of(work, struct kmem_cache, kobj_remove_work); + + if (!s->kobj.state_in_sysfs) + /* + * For a memcg cache, this may be called during + * deactivation and again on shutdown. Remove only once. + * A cache is never shut down before deactivation is + * complete, so no need to worry about synchronization. + */ + return; + +#ifdef CONFIG_MEMCG + kset_unregister(s->memcg_kset); +#endif + kobject_uevent(&s->kobj, KOBJ_REMOVE); + kobject_del(&s->kobj); + kobject_put(&s->kobj); +} + static int sysfs_slab_add(struct kmem_cache *s) { int err; @@ -5632,6 +5654,8 @@ static int sysfs_slab_add(struct kmem_cache *s) struct kset *kset = cache_kset(s); int unmergeable = slab_unmergeable(s); + INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn); + if (!kset) { kobject_init(&s->kobj, &slab_ktype); return 0; @@ -5695,20 +5719,8 @@ static void sysfs_slab_remove(struct kmem_cache *s) */ return; - if (!s->kobj.state_in_sysfs) - /* - * For a memcg cache, this may be called during - * deactivation and again on shutdown. Remove only once. - * A cache is never shut down before deactivation is - * complete, so no need to worry about synchronization. - */ - return; - -#ifdef CONFIG_MEMCG - kset_unregister(s->memcg_kset); -#endif - kobject_uevent(&s->kobj, KOBJ_REMOVE); - kobject_del(&s->kobj); + kobject_get(&s->kobj); + schedule_work(&s->kobj_remove_work); } void sysfs_slab_release(struct kmem_cache *s) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 34a1c3e46ed7..ecc97f74ab18 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -287,10 +287,21 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) if (p4d_none(*p4d)) return NULL; pud = pud_offset(p4d, addr); - if (pud_none(*pud)) + + /* + * Don't dereference bad PUD or PMD (below) entries. This will also + * identify huge mappings, which we may encounter on architectures + * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be + * identified as vmalloc addresses by is_vmalloc_addr(), but are + * not [unambiguously] associated with a struct page, so there is + * no correct value to return for them. + */ + WARN_ON_ONCE(pud_bad(*pud)); + if (pud_none(*pud) || pud_bad(*pud)) return NULL; pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + WARN_ON_ONCE(pmd_bad(*pmd)); + if (pmd_none(*pmd) || pmd_bad(*pmd)) return NULL; ptep = pte_offset_map(pmd, addr); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index fca84e111c89..4e111196f902 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -252,7 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); - atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = atmvcc->atm_options; dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; diff --git a/net/atm/clip.c b/net/atm/clip.c index a7e4018370b4..f271a7bcf5b2 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -137,11 +137,11 @@ static int neigh_check_cb(struct neighbour *n) if (entry->vccs || time_before(jiffies, entry->expires)) return 0; - if (atomic_read(&n->refcnt) > 1) { + if (refcount_read(&n->refcnt) > 1) { struct sk_buff *skb; pr_debug("destruction postponed with ref %d\n", - atomic_read(&n->refcnt)); + refcount_read(&n->refcnt)); while ((skb = skb_dequeue(&n->arp_queue)) != NULL) dev_kfree_skb(skb); @@ -381,7 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, memcpy(here, llc_oui, sizeof(llc_oui)); ((__be16 *) here)[3] = skb->protocol; } - atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = vcc->atm_options; entry->vccs->last_use = jiffies; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); @@ -767,7 +767,7 @@ static void atmarp_info(struct seq_file *seq, struct neighbour *n, seq_printf(seq, "(resolving)\n"); else seq_printf(seq, "(expired, ref %d)\n", - atomic_read(&entry->neigh->refcnt)); + refcount_read(&entry->neigh->refcnt)); } else if (!svc) { seq_printf(seq, "%d.%d.%d\n", clip_vcc->vcc->dev->number, diff --git a/net/atm/common.c b/net/atm/common.c index f06422f4108d..8a4f99114cd2 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -80,9 +80,9 @@ static void vcc_sock_destruct(struct sock *sk) printk(KERN_DEBUG "%s: rmem leakage (%d bytes) detected.\n", __func__, atomic_read(&sk->sk_rmem_alloc)); - if (atomic_read(&sk->sk_wmem_alloc)) + if (refcount_read(&sk->sk_wmem_alloc)) printk(KERN_DEBUG "%s: wmem leakage (%d bytes) detected.\n", - __func__, atomic_read(&sk->sk_wmem_alloc)); + __func__, refcount_read(&sk->sk_wmem_alloc)); } static void vcc_def_wakeup(struct sock *sk) @@ -101,7 +101,7 @@ static inline int vcc_writable(struct sock *sk) struct atm_vcc *vcc = atm_sk(sk); return (vcc->qos.txtp.max_sdu + - atomic_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf; + refcount_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf; } static void vcc_write_space(struct sock *sk) @@ -156,7 +156,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family, i memset(&vcc->local, 0, sizeof(struct sockaddr_atmsvc)); memset(&vcc->remote, 0, sizeof(struct sockaddr_atmsvc)); vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */ - atomic_set(&sk->sk_wmem_alloc, 1); + refcount_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_rmem_alloc, 0); vcc->push = NULL; vcc->pop = NULL; @@ -630,7 +630,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) goto out; } pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb->dev = NULL; /* for paths shared with net_device interfaces */ ATM_SKB(skb)->atm_options = vcc->atm_options; diff --git a/net/atm/lec.c b/net/atm/lec.c index 09cfe87f0a44..75545717fa46 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -181,7 +181,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb) ATM_SKB(skb)->vcc = vcc; ATM_SKB(skb)->atm_options = vcc->atm_options; - atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); if (vcc->send(vcc, skb) < 0) { dev->stats.tx_dropped++; return; @@ -345,7 +345,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) int i; char *tmp; /* FIXME */ - atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); mesg = (struct atmlec_msg *)skb->data; tmp = skb->data; tmp += sizeof(struct atmlec_msg); diff --git a/net/atm/mpc.c b/net/atm/mpc.c index a190800572bd..680a4b9095a1 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -555,7 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc) sizeof(struct llc_snap_hdr)); } - atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = entry->shortcut->atm_options; entry->shortcut->send(entry->shortcut, skb); entry->packets_fwded++; @@ -911,7 +911,7 @@ static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb) struct mpoa_client *mpc = find_mpc_by_vcc(vcc); struct k_message *mesg = (struct k_message *)skb->data; - atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); if (mpc == NULL) { pr_info("no mpc found\n"); diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index c4e09846d1de..21d9d341a619 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -350,7 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) return 1; } - atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); + refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); diff --git a/net/atm/proc.c b/net/atm/proc.c index bbb6461a4b7f..27c9c01c537d 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -211,7 +211,7 @@ static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc) vcc->flags, sk->sk_err, sk_wmem_alloc_get(sk), sk->sk_sndbuf, sk_rmem_alloc_get(sk), sk->sk_rcvbuf, - atomic_read(&sk->sk_refcnt)); + refcount_read(&sk->sk_refcnt)); } static void svc_info(struct seq_file *seq, struct atm_vcc *vcc) diff --git a/net/atm/raw.c b/net/atm/raw.c index 2e17e97a7a8b..821c0797553d 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -35,7 +35,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("(%d) %d -= %d\n", vcc->vci, sk_wmem_alloc_get(sk), skb->truesize); - atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); dev_kfree_skb_any(skb); sk->sk_write_space(sk); } diff --git a/net/atm/signaling.c b/net/atm/signaling.c index f640a99e14b8..983c3a21a133 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -67,7 +67,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb) struct sock *sk; msg = (struct atmsvc_msg *) skb->data; - atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc)); vcc = *(struct atm_vcc **) &msg->vcc; pr_debug("%d (0x%lx)\n", (int)msg->type, (unsigned long)vcc); sk = sk_atm(vcc); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 8a8f77a247e6..91e3ba280706 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -657,7 +657,7 @@ static int bt_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %-6lu", sk, - atomic_read(&sk->sk_refcnt), + refcount_read(&sk->sk_refcnt), sk_rmem_alloc_get(sk), sk_wmem_alloc_get(sk), from_kuid(seq_user_ns(seq), sock_i_uid(sk)), diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 9a40013da915..7b3965861013 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -481,16 +481,16 @@ static int bnep_session(void *arg) struct net_device *dev = s->dev; struct sock *sk = s->sock->sk; struct sk_buff *skb; - wait_queue_t wait; + DEFINE_WAIT_FUNC(wait, woken_wake_function); BT_DBG(""); set_user_nice(current, -15); - init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); + /* Ensure session->terminate is updated */ + smp_mb__before_atomic(); if (atomic_read(&s->terminate)) break; @@ -512,9 +512,8 @@ static int bnep_session(void *arg) break; netif_wake_queue(dev); - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ @@ -663,7 +662,7 @@ int bnep_del_connection(struct bnep_conndel_req *req) s = __bnep_get_session(req->dst); if (s) { atomic_inc(&s->terminate); - wake_up_process(s->task); + wake_up_interruptible(sk_sleep(s->sock->sk)); } else err = -ENOENT; diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index f4c64ef01c24..7f26a5a19ff6 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -280,16 +280,16 @@ static int cmtp_session(void *arg) struct cmtp_session *session = arg; struct sock *sk = session->sock->sk; struct sk_buff *skb; - wait_queue_t wait; + DEFINE_WAIT_FUNC(wait, woken_wake_function); BT_DBG("session %p", session); set_user_nice(current, -15); - init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); + /* Ensure session->terminate is updated */ + smp_mb__before_atomic(); if (atomic_read(&session->terminate)) break; @@ -306,9 +306,8 @@ static int cmtp_session(void *arg) cmtp_process_transmit(session); - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); down_write(&cmtp_session_sem); @@ -393,7 +392,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) err = cmtp_attach_device(session); if (err < 0) { atomic_inc(&session->terminate); - wake_up_process(session->task); + wake_up_interruptible(sk_sleep(session->sock->sk)); up_write(&cmtp_session_sem); return err; } @@ -431,7 +430,11 @@ int cmtp_del_connection(struct cmtp_conndel_req *req) /* Stop session thread */ atomic_inc(&session->terminate); - wake_up_process(session->task); + + /* Ensure session->terminate is updated */ + smp_mb__after_atomic(); + + wake_up_interruptible(sk_sleep(session->sock->sk)); } else err = -ENOENT; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d860e3cc23cf..6bc679cd3481 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3096,15 +3096,14 @@ int hci_register_dev(struct hci_dev *hdev) BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); - hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1, hdev->name); + hdev->workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI, hdev->name); if (!hdev->workqueue) { error = -ENOMEM; goto err; } - hdev->req_workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND | - WQ_MEM_RECLAIM, 1, hdev->name); + hdev->req_workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI, + hdev->name); if (!hdev->req_workqueue) { destroy_workqueue(hdev->workqueue); error = -ENOMEM; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 961f7f53e178..472b3907b1b0 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -36,6 +36,7 @@ #define VERSION "1.2" static DECLARE_RWSEM(hidp_session_sem); +static DECLARE_WAIT_QUEUE_HEAD(hidp_session_wq); static LIST_HEAD(hidp_session_list); static unsigned char hidp_keycode[256] = { @@ -1068,12 +1069,12 @@ static int hidp_session_start_sync(struct hidp_session *session) * Wake up session thread and notify it to stop. This is asynchronous and * returns immediately. Call this whenever a runtime error occurs and you want * the session to stop. - * Note: wake_up_process() performs any necessary memory-barriers for us. + * Note: wake_up_interruptible() performs any necessary memory-barriers for us. */ static void hidp_session_terminate(struct hidp_session *session) { atomic_inc(&session->terminate); - wake_up_process(session->task); + wake_up_interruptible(&hidp_session_wq); } /* @@ -1180,7 +1181,9 @@ static void hidp_session_run(struct hidp_session *session) struct sock *ctrl_sk = session->ctrl_sock->sk; struct sock *intr_sk = session->intr_sock->sk; struct sk_buff *skb; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + add_wait_queue(&hidp_session_wq, &wait); for (;;) { /* * This thread can be woken up two ways: @@ -1188,12 +1191,10 @@ static void hidp_session_run(struct hidp_session *session) * session->terminate flag and wakes this thread up. * - Via modifying the socket state of ctrl/intr_sock. This * thread is woken up by ->sk_state_changed(). - * - * Note: set_current_state() performs any necessary - * memory-barriers for us. */ - set_current_state(TASK_INTERRUPTIBLE); + /* Ensure session->terminate is updated */ + smp_mb__before_atomic(); if (atomic_read(&session->terminate)) break; @@ -1227,11 +1228,22 @@ static void hidp_session_run(struct hidp_session *session) hidp_process_transmit(session, &session->ctrl_transmit, session->ctrl_sock); - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } + remove_wait_queue(&hidp_session_wq, &wait); atomic_inc(&session->terminate); - set_current_state(TASK_RUNNING); + + /* Ensure session->terminate is updated */ + smp_mb__after_atomic(); +} + +static int hidp_session_wake_function(wait_queue_t *wait, + unsigned int mode, + int sync, void *key) +{ + wake_up_interruptible(&hidp_session_wq); + return false; } /* @@ -1244,7 +1256,8 @@ static void hidp_session_run(struct hidp_session *session) static int hidp_session_thread(void *arg) { struct hidp_session *session = arg; - wait_queue_t ctrl_wait, intr_wait; + DEFINE_WAIT_FUNC(ctrl_wait, hidp_session_wake_function); + DEFINE_WAIT_FUNC(intr_wait, hidp_session_wake_function); BT_DBG("session %p", session); @@ -1254,8 +1267,6 @@ static int hidp_session_thread(void *arg) set_user_nice(current, -15); hidp_set_timer(session); - init_waitqueue_entry(&ctrl_wait, current); - init_waitqueue_entry(&intr_wait, current); add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait); add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait); /* This memory barrier is paired with wq_has_sleeper(). See diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 507b80d59dec..67a8642f57ea 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -87,7 +87,8 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) BT_DBG("sk %p", sk); - if (!addr || addr->sa_family != AF_BLUETOOTH) + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_BLUETOOTH) return -EINVAL; memset(&la, 0, sizeof(la)); @@ -181,7 +182,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, BT_DBG("sk %p", sk); - if (!addr || alen < sizeof(addr->sa_family) || + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ac3c650cb234..1aaccf637479 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -197,7 +197,7 @@ static void rfcomm_sock_kill(struct sock *sk) if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) return; - BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, atomic_read(&sk->sk_refcnt)); + BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); /* Kill poor orphan */ bt_sock_unlink(&rfcomm_sk_list, sk); @@ -339,7 +339,8 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr struct sock *sk = sock->sk; int len, err = 0; - if (!addr || addr->sa_family != AF_BLUETOOTH) + if (!addr || addr_len < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_BLUETOOTH) return -EINVAL; memset(&sa, 0, sizeof(sa)); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 728e0c8dc8e7..795e920a3281 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -524,10 +524,8 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr); - if (!addr || addr->sa_family != AF_BLUETOOTH) - return -EINVAL; - - if (addr_len < sizeof(struct sockaddr_sco)) + if (!addr || addr_len < sizeof(struct sockaddr_sco) || + addr->sa_family != AF_BLUETOOTH) return -EINVAL; lock_sock(sk); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 067cf0313449..2261e5194c82 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -149,12 +149,12 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; - if (atomic_read(&nf_bridge->use) > 1) { + if (refcount_read(&nf_bridge->use) > 1) { struct nf_bridge_info *tmp = nf_bridge_alloc(skb); if (tmp) { memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); - atomic_set(&tmp->use, 1); + refcount_set(&tmp->use, 1); } nf_bridge_put(nf_bridge); nf_bridge = tmp; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 0b5dd607444c..723f25eed8ea 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -865,7 +865,7 @@ static struct attribute *bridge_attrs[] = { NULL }; -static struct attribute_group bridge_group = { +static const struct attribute_group bridge_group = { .name = SYSFS_BRIDGE_ATTR, .attrs = bridge_attrs, }; diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index e0bb624c3845..dfc86a0199da 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -61,7 +61,7 @@ static int ebt_dnat_tg_check(const struct xt_tgchk_param *par) (strcmp(par->table, "broute") != 0 || hook_mask & ~(1 << NF_BR_BROUTING))) return -EINVAL; - if (INVALID_TARGET) + if (ebt_invalid_target(info->target)) return -EINVAL; return 0; } diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 66697cbd0a8b..19f0f9592d32 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -44,7 +44,7 @@ static int ebt_mark_tg_check(const struct xt_tgchk_param *par) tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) return -EINVAL; - if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) + if (ebt_invalid_target(tmp)) return -EINVAL; tmp = info->target & ~EBT_VERDICT_BITS; if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 8d2a85e0594e..a7223eaf490b 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -47,7 +47,7 @@ static int ebt_redirect_tg_check(const struct xt_tgchk_param *par) (strcmp(par->table, "broute") != 0 || hook_mask & ~(1 << NF_BR_BROUTING))) return -EINVAL; - if (INVALID_TARGET) + if (ebt_invalid_target(info->target)) return -EINVAL; return 0; } diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index e56ccd060d26..11cf9e9e9222 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -51,7 +51,7 @@ static int ebt_snat_tg_check(const struct xt_tgchk_param *par) if (BASE_CHAIN && tmp == EBT_RETURN) return -EINVAL; - if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) + if (ebt_invalid_target(tmp)) return -EINVAL; tmp = info->target | EBT_VERDICT_BITS; if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 7506b853a84d..632d5a416d97 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1013,7 +1013,7 @@ static const struct proto_ops caif_stream_ops = { static void caif_sock_destructor(struct sock *sk) { struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - caif_assert(!atomic_read(&sk->sk_wmem_alloc)); + caif_assert(!refcount_read(&sk->sk_wmem_alloc)); caif_assert(sk_unhashed(sk)); caif_assert(!sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { diff --git a/net/core/datagram.c b/net/core/datagram.c index e5311a7c70da..454ec8923333 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -188,7 +188,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, } } *peeked = 1; - atomic_inc(&skb->users); + refcount_inc(&skb->users); } else { __skb_unlink(skb, queue); if (destructor) @@ -358,7 +358,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, spin_lock_bh(&sk_queue->lock); if (skb == skb_peek(sk_queue)) { __skb_unlink(skb, sk_queue); - atomic_dec(&skb->users); + refcount_dec(&skb->users); if (destructor) destructor(sk, skb); err = 0; @@ -614,7 +614,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) skb->data_len += copied; skb->len += copied; skb->truesize += truesize; - atomic_add(truesize, &skb->sk->sk_wmem_alloc); + refcount_add(truesize, &skb->sk->sk_wmem_alloc); while (copied) { int size = min_t(int, copied, PAGE_SIZE - start); skb_fill_page_desc(skb, frag++, pages[n], start, size); diff --git a/net/core/dev.c b/net/core/dev.c index a91572aa73d5..7098fba52be1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1862,7 +1862,7 @@ static inline int deliver_skb(struct sk_buff *skb, { if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) return -ENOMEM; - atomic_inc(&skb->users); + refcount_inc(&skb->users); return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } @@ -2484,10 +2484,10 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) if (unlikely(!skb)) return; - if (likely(atomic_read(&skb->users) == 1)) { + if (likely(refcount_read(&skb->users) == 1)) { smp_rmb(); - atomic_set(&skb->users, 0); - } else if (likely(!atomic_dec_and_test(&skb->users))) { + refcount_set(&skb->users, 0); + } else if (likely(!refcount_dec_and_test(&skb->users))) { return; } get_kfree_skb_cb(skb)->reason = reason; @@ -3955,7 +3955,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) clist = clist->next; - WARN_ON(atomic_read(&skb->users)); + WARN_ON(refcount_read(&skb->users)); if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) trace_consume_skb(skb); else @@ -4844,6 +4844,13 @@ struct packet_offload *gro_find_complete_by_type(__be16 type) } EXPORT_SYMBOL(gro_find_complete_by_type); +static void napi_skb_free_stolen_head(struct sk_buff *skb) +{ + skb_dst_drop(skb); + secpath_reset(skb); + kmem_cache_free(skbuff_head_cache, skb); +} + static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { @@ -4857,13 +4864,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) break; case GRO_MERGED_FREE: - if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) { - skb_dst_drop(skb); - secpath_reset(skb); - kmem_cache_free(skbuff_head_cache, skb); - } else { + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + napi_skb_free_stolen_head(skb); + else __kfree_skb(skb); - } break; case GRO_HELD: @@ -4935,10 +4939,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, break; case GRO_DROP: - case GRO_MERGED_FREE: napi_reuse_skb(napi, skb); break; + case GRO_MERGED_FREE: + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + napi_skb_free_stolen_head(skb); + else + napi_reuse_skb(napi, skb); + break; + case GRO_MERGED: case GRO_CONSUMED: break; @@ -7825,7 +7835,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, { #if BITS_PER_LONG == 64 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats)); - memcpy(stats64, netdev_stats, sizeof(*stats64)); + memcpy(stats64, netdev_stats, sizeof(*netdev_stats)); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + sizeof(*netdev_stats), 0, sizeof(*stats64) - sizeof(*netdev_stats)); @@ -7867,9 +7877,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, } else { netdev_stats_to_stats64(storage, &dev->stats); } - storage->rx_dropped += atomic_long_read(&dev->rx_dropped); - storage->tx_dropped += atomic_long_read(&dev->tx_dropped); - storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler); + storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped); + storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped); + storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler); return storage; } EXPORT_SYMBOL(dev_get_stats); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3bba291c6c32..a0093e1b0235 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -46,7 +46,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, if (r == NULL) return -ENOMEM; - atomic_set(&r->refcnt, 1); + refcount_set(&r->refcnt, 1); r->action = FR_ACT_TO_TBL; r->pref = pref; r->table = table; @@ -283,7 +283,7 @@ jumped: if (err != -EAGAIN) { if ((arg->flags & FIB_LOOKUP_NOREF) || - likely(atomic_inc_not_zero(&rule->refcnt))) { + likely(refcount_inc_not_zero(&rule->refcnt))) { arg->rule = rule; goto out; } @@ -517,7 +517,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, last = r; } - fib_rule_get(rule); + refcount_set(&rule->refcnt, 1); if (last) list_add_rcu(&rule->list, &last->list); diff --git a/net/core/filter.c b/net/core/filter.c index b39c869d22e3..94169572d002 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -54,6 +54,7 @@ #include <net/dst.h> #include <net/sock_reuseport.h> #include <net/busy_poll.h> +#include <net/tcp.h> /** * sk_filter_trim_cap - run a packet through a socket filter @@ -2011,7 +2012,7 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) static int bpf_skb_proto_4_to_6(struct sk_buff *skb) { const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); - u32 off = skb->network_header - skb->mac_header; + u32 off = skb_mac_header_len(skb); int ret; ret = skb_cow(skb, len_diff); @@ -2047,7 +2048,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) static int bpf_skb_proto_6_to_4(struct sk_buff *skb) { const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); - u32 off = skb->network_header - skb->mac_header; + u32 off = skb_mac_header_len(skb); int ret; ret = skb_unclone(skb, GFP_ATOMIC); @@ -2153,6 +2154,124 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = { .arg2_type = ARG_ANYTHING, }; +static u32 bpf_skb_net_base_len(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + return sizeof(struct iphdr); + case htons(ETH_P_IPV6): + return sizeof(struct ipv6hdr); + default: + return ~0U; + } +} + +static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) +{ + u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); + int ret; + + ret = skb_cow(skb, len_diff); + if (unlikely(ret < 0)) + return ret; + + ret = bpf_skb_net_hdr_push(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + if (skb_is_gso(skb)) { + /* Due to header grow, MSS needs to be downgraded. */ + skb_shinfo(skb)->gso_size -= len_diff; + /* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + return 0; +} + +static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) +{ + u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); + int ret; + + ret = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(ret < 0)) + return ret; + + ret = bpf_skb_net_hdr_pop(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + if (skb_is_gso(skb)) { + /* Due to header shrink, MSS can be upgraded. */ + skb_shinfo(skb)->gso_size += len_diff; + /* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + return 0; +} + +static u32 __bpf_skb_max_len(const struct sk_buff *skb) +{ + return skb->dev->mtu + skb->dev->hard_header_len; +} + +static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) +{ + bool trans_same = skb->transport_header == skb->network_header; + u32 len_cur, len_diff_abs = abs(len_diff); + u32 len_min = bpf_skb_net_base_len(skb); + u32 len_max = __bpf_skb_max_len(skb); + __be16 proto = skb->protocol; + bool shrink = len_diff < 0; + int ret; + + if (unlikely(len_diff_abs > 0xfffU)) + return -EFAULT; + if (unlikely(proto != htons(ETH_P_IP) && + proto != htons(ETH_P_IPV6))) + return -ENOTSUPP; + + len_cur = skb->len - skb_network_offset(skb); + if (skb_transport_header_was_set(skb) && !trans_same) + len_cur = skb_network_header_len(skb); + if ((shrink && (len_diff_abs >= len_cur || + len_cur - len_diff_abs < len_min)) || + (!shrink && (skb->len + len_diff_abs > len_max && + !skb_is_gso(skb)))) + return -ENOTSUPP; + + ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : + bpf_skb_net_grow(skb, len_diff_abs); + + bpf_compute_data_end(skb); + return 0; +} + +BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, + u32, mode, u64, flags) +{ + if (unlikely(flags)) + return -EINVAL; + if (likely(mode == BPF_ADJ_ROOM_NET)) + return bpf_skb_adjust_net(skb, len_diff); + + return -ENOTSUPP; +} + +static const struct bpf_func_proto bpf_skb_adjust_room_proto = { + .func = bpf_skb_adjust_room, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + static u32 __bpf_skb_min_len(const struct sk_buff *skb) { u32 min_len = skb_network_offset(skb); @@ -2165,11 +2284,6 @@ static u32 __bpf_skb_min_len(const struct sk_buff *skb) return min_len; } -static u32 __bpf_skb_max_len(const struct sk_buff *skb) -{ - return skb->dev->mtu + skb->dev->hard_header_len; -} - static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) { unsigned int old_len = skb->len; @@ -2306,6 +2420,7 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_skb_change_proto || func == bpf_skb_change_head || func == bpf_skb_change_tail || + func == bpf_skb_adjust_room || func == bpf_skb_pull_data || func == bpf_clone_redirect || func == bpf_l3_csum_replace || @@ -2672,6 +2787,109 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, + int, level, int, optname, char *, optval, int, optlen) +{ + struct sock *sk = bpf_sock->sk; + int ret = 0; + int val; + + if (!sk_fullsock(sk)) + return -EINVAL; + + if (level == SOL_SOCKET) { + if (optlen != sizeof(int)) + return -EINVAL; + val = *((int *)optval); + + /* Only some socketops are supported */ + switch (optname) { + case SO_RCVBUF: + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); + break; + case SO_SNDBUF: + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); + break; + case SO_MAX_PACING_RATE: + sk->sk_max_pacing_rate = val; + sk->sk_pacing_rate = min(sk->sk_pacing_rate, + sk->sk_max_pacing_rate); + break; + case SO_PRIORITY: + sk->sk_priority = val; + break; + case SO_RCVLOWAT: + if (val < 0) + val = INT_MAX; + sk->sk_rcvlowat = val ? : 1; + break; + case SO_MARK: + sk->sk_mark = val; + break; + default: + ret = -EINVAL; + } +#ifdef CONFIG_INET + } else if (level == SOL_TCP && + sk->sk_prot->setsockopt == tcp_setsockopt) { + if (optname == TCP_CONGESTION) { + char name[TCP_CA_NAME_MAX]; + + strncpy(name, optval, min_t(long, optlen, + TCP_CA_NAME_MAX-1)); + name[TCP_CA_NAME_MAX-1] = 0; + ret = tcp_set_congestion_control(sk, name, false); + if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN) + /* replacing an existing ca */ + tcp_reinit_congestion_control(sk, + inet_csk(sk)->icsk_ca_ops); + } else { + struct tcp_sock *tp = tcp_sk(sk); + + if (optlen != sizeof(int)) + return -EINVAL; + + val = *((int *)optval); + /* Only some options are supported */ + switch (optname) { + case TCP_BPF_IW: + if (val <= 0 || tp->data_segs_out > 0) + ret = -EINVAL; + else + tp->snd_cwnd = val; + break; + case TCP_BPF_SNDCWND_CLAMP: + if (val <= 0) { + ret = -EINVAL; + } else { + tp->snd_cwnd_clamp = val; + tp->snd_ssthresh = val; + } + default: + ret = -EINVAL; + } + } + ret = -EINVAL; +#endif + } else { + ret = -EINVAL; + } + return ret; +} + +static const struct bpf_func_proto bpf_setsockopt_proto = { + .func = bpf_setsockopt, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -2745,6 +2963,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_change_proto_proto; case BPF_FUNC_skb_change_type: return &bpf_skb_change_type_proto; + case BPF_FUNC_skb_adjust_room: + return &bpf_skb_adjust_room_proto; case BPF_FUNC_skb_change_tail: return &bpf_skb_change_tail_proto; case BPF_FUNC_skb_get_tunnel_key: @@ -2823,6 +3043,17 @@ lwt_inout_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * + sock_ops_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_setsockopt: + return &bpf_setsockopt_proto; + default: + return bpf_base_func_proto(func_id); + } +} + +static const struct bpf_func_proto * lwt_xmit_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -2857,38 +3088,11 @@ lwt_xmit_func_proto(enum bpf_func_id func_id) } } -static void __set_access_aux_info(int off, struct bpf_insn_access_aux *info) +static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) { - info->ctx_field_size = 4; - switch (off) { - case offsetof(struct __sk_buff, pkt_type) ... - offsetof(struct __sk_buff, pkt_type) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, vlan_present) ... - offsetof(struct __sk_buff, vlan_present) + sizeof(__u32) - 1: - info->converted_op_size = 1; - break; - case offsetof(struct __sk_buff, queue_mapping) ... - offsetof(struct __sk_buff, queue_mapping) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, protocol) ... - offsetof(struct __sk_buff, protocol) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, vlan_tci) ... - offsetof(struct __sk_buff, vlan_tci) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, vlan_proto) ... - offsetof(struct __sk_buff, vlan_proto) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, tc_index) ... - offsetof(struct __sk_buff, tc_index) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, tc_classid) ... - offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: - info->converted_op_size = 2; - break; - default: - info->converted_op_size = 4; - } -} + const int size_default = sizeof(__u32); -static bool __is_valid_access(int off, int size, enum bpf_access_type type, - struct bpf_insn_access_aux *info) -{ if (off < 0 || off >= sizeof(struct __sk_buff)) return false; @@ -2897,40 +3101,24 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type, return false; switch (off) { - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: - if (off + size > - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32)) + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): + if (off + size > offsetofend(struct __sk_buff, cb[4])) return false; break; - case offsetof(struct __sk_buff, data) ... - offsetof(struct __sk_buff, data) + sizeof(__u32) - 1: - if (size != sizeof(__u32)) + case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_end): + if (size != size_default) return false; - info->reg_type = PTR_TO_PACKET; - break; - case offsetof(struct __sk_buff, data_end) ... - offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1: - if (size != sizeof(__u32)) - return false; - info->reg_type = PTR_TO_PACKET_END; break; default: + /* Only narrow read access allowed for now. */ if (type == BPF_WRITE) { - if (size != sizeof(__u32)) + if (size != size_default) return false; } else { - int allowed; - - /* permit narrower load for not cb/data/data_end fields */ -#ifdef __LITTLE_ENDIAN - allowed = (off & 0x3) == 0 && size <= 4 && (size & (size - 1)) == 0; -#else - allowed = (off & 0x3) + size == 4 && size <= 4 && (size & (size - 1)) == 0; -#endif - if (!allowed) + bpf_ctx_record_field_size(info, size_default); + if (!bpf_ctx_narrow_access_ok(off, size, size_default)) return false; - __set_access_aux_info(off, info); } } @@ -2942,26 +3130,22 @@ static bool sk_filter_is_valid_access(int off, int size, struct bpf_insn_access_aux *info) { switch (off) { - case offsetof(struct __sk_buff, tc_classid) ... - offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, data) ... - offsetof(struct __sk_buff, data) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, data_end) ... - offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1: + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_end): return false; } if (type == BPF_WRITE) { switch (off) { - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; default: return false; } } - return __is_valid_access(off, size, type, info); + return bpf_skb_is_valid_access(off, size, type, info); } static bool lwt_is_valid_access(int off, int size, @@ -2969,24 +3153,31 @@ static bool lwt_is_valid_access(int off, int size, struct bpf_insn_access_aux *info) { switch (off) { - case offsetof(struct __sk_buff, tc_classid) ... - offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: + case bpf_ctx_range(struct __sk_buff, tc_classid): return false; } if (type == BPF_WRITE) { switch (off) { - case offsetof(struct __sk_buff, mark): - case offsetof(struct __sk_buff, priority): - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: + case bpf_ctx_range(struct __sk_buff, mark): + case bpf_ctx_range(struct __sk_buff, priority): + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; default: return false; } } - return __is_valid_access(off, size, type, info); + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, info); } static bool sock_filter_is_valid_access(int off, int size, @@ -3058,19 +3249,27 @@ static bool tc_cls_act_is_valid_access(int off, int size, { if (type == BPF_WRITE) { switch (off) { - case offsetof(struct __sk_buff, mark): - case offsetof(struct __sk_buff, tc_index): - case offsetof(struct __sk_buff, priority): - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, mark): + case bpf_ctx_range(struct __sk_buff, tc_index): + case bpf_ctx_range(struct __sk_buff, priority): + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; default: return false; } } - return __is_valid_access(off, size, type, info); + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, info); } static bool __is_valid_xdp_access(int off, int size) @@ -3110,101 +3309,141 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); +static bool __is_valid_sock_ops_access(int off, int size) +{ + if (off < 0 || off >= sizeof(struct bpf_sock_ops)) + return false; + /* The verifier guarantees that size > 0. */ + if (off % size != 0) + return false; + if (size != sizeof(__u32)) + return false; + + return true; +} + +static bool sock_ops_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (type == BPF_WRITE) { + switch (off) { + case offsetof(struct bpf_sock_ops, op) ... + offsetof(struct bpf_sock_ops, replylong[3]): + break; + default: + return false; + } + } + + return __is_valid_sock_ops_access(off, size); +} + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; int off; switch (si->off) { case offsetof(struct __sk_buff, len): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, len)); + bpf_target_off(struct sk_buff, len, 4, + target_size)); break; case offsetof(struct __sk_buff, protocol): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, protocol)); + bpf_target_off(struct sk_buff, protocol, 2, + target_size)); break; case offsetof(struct __sk_buff, vlan_proto): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); - *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, vlan_proto)); + bpf_target_off(struct sk_buff, vlan_proto, 2, + target_size)); break; case offsetof(struct __sk_buff, priority): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); - if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, priority)); + bpf_target_off(struct sk_buff, priority, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, priority)); + bpf_target_off(struct sk_buff, priority, 4, + target_size)); break; case offsetof(struct __sk_buff, ingress_ifindex): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, skb_iif)); + bpf_target_off(struct sk_buff, skb_iif, 4, + target_size)); break; case offsetof(struct __sk_buff, ifindex): - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), si->dst_reg, si->src_reg, offsetof(struct sk_buff, dev)); *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, - offsetof(struct net_device, ifindex)); + bpf_target_off(struct net_device, ifindex, 4, + target_size)); break; case offsetof(struct __sk_buff, hash): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, hash)); + bpf_target_off(struct sk_buff, hash, 4, + target_size)); break; case offsetof(struct __sk_buff, mark): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, mark)); + bpf_target_off(struct sk_buff, mark, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, mark)); + bpf_target_off(struct sk_buff, mark, 4, + target_size)); break; case offsetof(struct __sk_buff, pkt_type): - return convert_skb_access(SKF_AD_PKTTYPE, si->dst_reg, - si->src_reg, insn); + *target_size = 1; + *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, + PKT_TYPE_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5); +#endif + break; case offsetof(struct __sk_buff, queue_mapping): - return convert_skb_access(SKF_AD_QUEUE, si->dst_reg, - si->src_reg, insn); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, queue_mapping, 2, + target_size)); + break; case offsetof(struct __sk_buff, vlan_present): - return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, - si->dst_reg, si->src_reg, insn); - case offsetof(struct __sk_buff, vlan_tci): - return convert_skb_access(SKF_AD_VLAN_TAG, - si->dst_reg, si->src_reg, insn); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, vlan_tci, 2, + target_size)); + if (si->off == offsetof(struct __sk_buff, vlan_tci)) { + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, + ~VLAN_TAG_PRESENT); + } else { + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1); + } + break; case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: + offsetofend(struct __sk_buff, cb[4]) - 1: BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); BUILD_BUG_ON((offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb, data)) % @@ -3230,6 +3469,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off -= offsetof(struct __sk_buff, tc_classid); off += offsetof(struct sk_buff, cb); off += offsetof(struct qdisc_skb_cb, tc_classid); + *target_size = 2; if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, off); @@ -3255,14 +3495,14 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); - if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, tc_index)); + bpf_target_off(struct sk_buff, tc_index, 2, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, tc_index)); + bpf_target_off(struct sk_buff, tc_index, 2, + target_size)); #else if (type == BPF_WRITE) *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); @@ -3273,10 +3513,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, napi_id): #if defined(CONFIG_NET_RX_BUSY_POLL) - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, napi_id)); + bpf_target_off(struct sk_buff, napi_id, 4, + target_size)); *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #else @@ -3291,7 +3530,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; @@ -3335,22 +3574,22 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; switch (si->off) { case offsetof(struct __sk_buff, ifindex): - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), si->dst_reg, si->src_reg, offsetof(struct sk_buff, dev)); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, - offsetof(struct net_device, ifindex)); + bpf_target_off(struct net_device, ifindex, 4, + target_size)); break; default: - return bpf_convert_ctx_access(type, si, insn_buf, prog); + return bpf_convert_ctx_access(type, si, insn_buf, prog, + target_size); } return insn - insn_buf; @@ -3359,7 +3598,7 @@ static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, static u32 xdp_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; @@ -3379,6 +3618,139 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + int off; + + switch (si->off) { + case offsetof(struct bpf_sock_ops, op) ... + offsetof(struct bpf_sock_ops, replylong[3]): + BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) != + FIELD_SIZEOF(struct bpf_sock_ops_kern, op)); + BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) != + FIELD_SIZEOF(struct bpf_sock_ops_kern, reply)); + BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) != + FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong)); + off = si->off; + off -= offsetof(struct bpf_sock_ops, op); + off += offsetof(struct bpf_sock_ops_kern, op); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, + off); + else + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + off); + break; + + case offsetof(struct bpf_sock_ops, family): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_family)); + break; + + case offsetof(struct bpf_sock_ops, remote_ip4): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_daddr)); + break; + + case offsetof(struct bpf_sock_ops, local_ip4): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_rcv_saddr)); + break; + + case offsetof(struct bpf_sock_ops, remote_ip6[0]) ... + offsetof(struct bpf_sock_ops, remote_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_v6_daddr.s6_addr32[0]) != 4); + + off = si->off; + off -= offsetof(struct bpf_sock_ops, remote_ip6[0]); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_v6_daddr.s6_addr32[0]) + + off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct bpf_sock_ops, local_ip6[0]) ... + offsetof(struct bpf_sock_ops, local_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_v6_rcv_saddr.s6_addr32[0]) != 4); + + off = si->off; + off -= offsetof(struct bpf_sock_ops, local_ip6[0]); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_v6_rcv_saddr.s6_addr32[0]) + + off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct bpf_sock_ops, remote_port): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_dport)); +#ifndef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); +#endif + break; + + case offsetof(struct bpf_sock_ops, local_port): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_num)); + break; + } + return insn - insn_buf; +} + const struct bpf_verifier_ops sk_filter_prog_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, @@ -3428,6 +3800,12 @@ const struct bpf_verifier_ops cg_sock_prog_ops = { .convert_ctx_access = sock_filter_convert_ctx_access, }; +const struct bpf_verifier_ops sock_ops_prog_ops = { + .get_func_proto = sock_ops_func_proto, + .is_valid_access = sock_ops_is_valid_access, + .convert_ctx_access = sock_ops_convert_ctx_access, +}; + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index dadb5eef91c3..e31fc11a8000 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -124,7 +124,7 @@ static bool neigh_del(struct neighbour *n, __u8 state, bool retval = false; write_lock(&n->lock); - if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & state)) { + if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) { struct neighbour *neigh; neigh = rcu_dereference_protected(n->next, @@ -254,7 +254,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) neigh_del_timer(n); n->dead = 1; - if (atomic_read(&n->refcnt) != 1) { + if (refcount_read(&n->refcnt) != 1) { /* The most unpleasant situation. We must destroy neighbour entry, but someone still uses it. @@ -335,7 +335,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device NEIGH_CACHE_STAT_INC(tbl, allocs); n->tbl = tbl; - atomic_set(&n->refcnt, 1); + refcount_set(&n->refcnt, 1); n->dead = 1; out: return n; @@ -444,7 +444,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, rcu_read_lock_bh(); n = __neigh_lookup_noref(tbl, pkey, dev); if (n) { - if (!atomic_inc_not_zero(&n->refcnt)) + if (!refcount_inc_not_zero(&n->refcnt)) n = NULL; NEIGH_CACHE_STAT_INC(tbl, hits); } @@ -473,7 +473,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, n = rcu_dereference_bh(n->next)) { if (!memcmp(n->primary_key, pkey, key_len) && net_eq(dev_net(n->dev), net)) { - if (!atomic_inc_not_zero(&n->refcnt)) + if (!refcount_inc_not_zero(&n->refcnt)) n = NULL; NEIGH_CACHE_STAT_INC(tbl, hits); break; @@ -709,7 +709,7 @@ static void neigh_parms_destroy(struct neigh_parms *parms); static inline void neigh_parms_put(struct neigh_parms *parms) { - if (atomic_dec_and_test(&parms->refcnt)) + if (refcount_dec_and_test(&parms->refcnt)) neigh_parms_destroy(parms); } @@ -821,7 +821,7 @@ static void neigh_periodic_work(struct work_struct *work) if (time_before(n->used, n->confirmed)) n->used = n->confirmed; - if (atomic_read(&n->refcnt) == 1 && + if (refcount_read(&n->refcnt) == 1 && (state == NUD_FAILED || time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { *np = n->next; @@ -1479,7 +1479,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { p->tbl = tbl; - atomic_set(&p->refcnt, 1); + refcount_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); dev_hold(dev); @@ -1542,7 +1542,7 @@ void neigh_table_init(int index, struct neigh_table *tbl) INIT_LIST_HEAD(&tbl->parms_list); list_add(&tbl->parms.list, &tbl->parms_list); write_pnet(&tbl->parms.net, &init_net); - atomic_set(&tbl->parms.refcnt, 1); + refcount_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); @@ -1796,7 +1796,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) if ((parms->dev && nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || - nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || + nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || /* approximative value for deprecated QUEUE_LEN (in packets) */ @@ -2234,7 +2234,7 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, ci.ndm_used = jiffies_to_clock_t(now - neigh->used); ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); - ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; + ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; read_unlock_bh(&neigh->lock); if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 58e6cc70500d..b4f9922b6f23 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -626,7 +626,7 @@ static struct attribute *netstat_attrs[] = { }; -static struct attribute_group netstat_group = { +static const struct attribute_group netstat_group = { .name = "statistics", .attrs = netstat_attrs, }; @@ -636,7 +636,7 @@ static struct attribute *wireless_attrs[] = { NULL }; -static struct attribute_group wireless_group = { +static const struct attribute_group wireless_group = { .name = "wireless", .attrs = wireless_attrs, }; @@ -1204,7 +1204,7 @@ static struct attribute *dql_attrs[] = { NULL }; -static struct attribute_group dql_group = { +static const struct attribute_group dql_group = { .name = "byte_queue_limits", .attrs = dql_attrs, }; @@ -1448,7 +1448,7 @@ static void *net_grab_current_ns(void) struct net *ns = current->nsproxy->net_ns; #ifdef CONFIG_NET_NS if (ns) - atomic_inc(&ns->passive); + refcount_inc(&ns->passive); #endif return ns; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2178db8e47cd..8726d051f31d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -284,7 +284,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); - atomic_set(&net->passive, 1); + refcount_set(&net->passive, 1); net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); @@ -380,7 +380,7 @@ static void net_free(struct net *net) void net_drop_ns(void *p) { struct net *ns = p; - if (ns && atomic_dec_and_test(&ns->passive)) + if (ns && refcount_dec_and_test(&ns->passive)) net_free(ns); } @@ -501,6 +501,23 @@ static void cleanup_net(struct work_struct *work) net_drop_ns(net); } } + +/** + * net_ns_barrier - wait until concurrent net_cleanup_work is done + * + * cleanup_net runs from work queue and will first remove namespaces + * from the global list, then run net exit functions. + * + * Call this in module exit path to make sure that all netns + * ->exit ops have been invoked before the function is removed. + */ +void net_ns_barrier(void) +{ + mutex_lock(&net_mutex); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL(net_ns_barrier); + static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 37c1e34ddd85..d3408a693166 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -277,7 +277,7 @@ static void zap_completion_queue(void) struct sk_buff *skb = clist; clist = clist->next; if (!skb_irq_freeable(skb)) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); dev_kfree_skb_any(skb); /* put this one back */ } else { __kfree_skb(skb); @@ -309,7 +309,7 @@ repeat: return NULL; } - atomic_set(&skb->users, 1); + refcount_set(&skb->users, 1); skb_reserve(skb, reserve); return skb; } @@ -632,7 +632,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); - atomic_set(&npinfo->refcnt, 1); + refcount_set(&npinfo->refcnt, 1); ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { @@ -642,7 +642,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) } } else { npinfo = rtnl_dereference(ndev->npinfo); - atomic_inc(&npinfo->refcnt); + refcount_inc(&npinfo->refcnt); } npinfo->netpoll = np; @@ -821,7 +821,7 @@ void __netpoll_cleanup(struct netpoll *np) synchronize_srcu(&netpoll_srcu); - if (atomic_dec_and_test(&npinfo->refcnt)) { + if (refcount_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; ops = np->dev->netdev_ops; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2dd42c5b0366..6e1e10ff433a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3363,7 +3363,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) { ktime_t idle_start = ktime_get(); - while (atomic_read(&(pkt_dev->skb->users)) != 1) { + while (refcount_read(&(pkt_dev->skb->users)) != 1) { if (signal_pending(current)) break; @@ -3420,7 +3420,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) { skb = pkt_dev->skb; skb->protocol = eth_type_trans(skb, skb->dev); - atomic_add(burst, &skb->users); + refcount_add(burst, &skb->users); local_bh_disable(); do { ret = netif_receive_skb(skb); @@ -3428,11 +3428,11 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->errors++; pkt_dev->sofar++; pkt_dev->seq_num++; - if (atomic_read(&skb->users) != burst) { + if (refcount_read(&skb->users) != burst) { /* skb was queued by rps/rfs or taps, * so cannot reuse this skb */ - atomic_sub(burst - 1, &skb->users); + WARN_ON(refcount_sub_and_test(burst - 1, &skb->users)); /* get out of the loop and wait * until skb is consumed */ @@ -3446,7 +3446,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto out; /* Skips xmit_mode M_START_XMIT */ } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) { local_bh_disable(); - atomic_inc(&pkt_dev->skb->users); + refcount_inc(&pkt_dev->skb->users); ret = dev_queue_xmit(pkt_dev->skb); switch (ret) { @@ -3487,7 +3487,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->last_ok = 0; goto unlock; } - atomic_add(burst, &pkt_dev->skb->users); + refcount_add(burst, &pkt_dev->skb->users); xmit_more: ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0); @@ -3513,11 +3513,11 @@ xmit_more: /* fallthru */ case NETDEV_TX_BUSY: /* Retry it next time */ - atomic_dec(&(pkt_dev->skb->users)); + refcount_dec(&(pkt_dev->skb->users)); pkt_dev->last_ok = 0; } if (unlikely(burst)) - atomic_sub(burst, &pkt_dev->skb->users); + WARN_ON(refcount_sub_and_test(burst, &pkt_dev->skb->users)); unlock: HARD_TX_UNLOCK(odev, txq); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ed51de525a88..d1ba90980be1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -649,7 +649,7 @@ int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int g NETLINK_CB(skb).dst_group = group; if (echo) - atomic_inc(&skb->users); + refcount_inc(&skb->users); netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); if (echo) err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f75897a33fa4..8b11341ed69a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -176,7 +176,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) memset(skb, 0, offsetof(struct sk_buff, tail)); skb->head = NULL; skb->truesize = sizeof(struct sk_buff); - atomic_set(&skb->users, 1); + refcount_set(&skb->users, 1); skb->mac_header = (typeof(skb->mac_header))~0U; out: @@ -247,7 +247,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, /* Account for allocated memory : skb + skb->head */ skb->truesize = SKB_TRUESIZE(size); skb->pfmemalloc = pfmemalloc; - atomic_set(&skb->users, 1); + refcount_set(&skb->users, 1); skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); @@ -268,7 +268,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, kmemcheck_annotate_bitfield(&fclones->skb2, flags1); skb->fclone = SKB_FCLONE_ORIG; - atomic_set(&fclones->fclone_ref, 1); + refcount_set(&fclones->fclone_ref, 1); fclones->skb2.fclone = SKB_FCLONE_CLONE; } @@ -314,7 +314,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = SKB_TRUESIZE(size); - atomic_set(&skb->users, 1); + refcount_set(&skb->users, 1); skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); @@ -629,7 +629,7 @@ static void kfree_skbmem(struct sk_buff *skb) * This test would have no chance to be true for the clone, * while here, branch prediction will be good. */ - if (atomic_read(&fclones->fclone_ref) == 1) + if (refcount_read(&fclones->fclone_ref) == 1) goto fastpath; break; @@ -637,7 +637,7 @@ static void kfree_skbmem(struct sk_buff *skb) fclones = container_of(skb, struct sk_buff_fclones, skb2); break; } - if (!atomic_dec_and_test(&fclones->fclone_ref)) + if (!refcount_dec_and_test(&fclones->fclone_ref)) return; fastpath: kmem_cache_free(skbuff_fclone_cache, fclones); @@ -915,7 +915,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(head_frag); C(data); C(truesize); - atomic_set(&n->users, 1); + refcount_set(&n->users, 1); atomic_inc(&(skb_shinfo(skb)->dataref)); skb->cloned = 1; @@ -1027,9 +1027,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) return NULL; if (skb->fclone == SKB_FCLONE_ORIG && - atomic_read(&fclones->fclone_ref) == 1) { + refcount_read(&fclones->fclone_ref) == 1) { n = &fclones->skb2; - atomic_set(&fclones->fclone_ref, 2); + refcount_set(&fclones->fclone_ref, 2); } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; @@ -3024,7 +3024,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, get_page(pfrag->page); skb->truesize += copy; - atomic_add(copy, &sk->sk_wmem_alloc); + refcount_add(copy, &sk->sk_wmem_alloc); skb->len += copy; skb->data_len += copy; offset += copy; @@ -3844,7 +3844,7 @@ struct sk_buff *skb_clone_sk(struct sk_buff *skb) struct sock *sk = skb->sk; struct sk_buff *clone; - if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt)) + if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return NULL; clone = skb_clone(skb, GFP_ATOMIC); @@ -3915,7 +3915,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ - if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { *skb_hwtstamps(skb) = *hwtstamps; __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); sock_put(sk); @@ -3997,7 +3997,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ - if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { err = sock_queue_err_skb(sk, skb); sock_put(sk); } diff --git a/net/core/sock.c b/net/core/sock.c index 6f4b090241c1..ba0ef6a7dbaf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1528,7 +1528,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, if (likely(sk->sk_net_refcnt)) get_net(net); sock_net_set(sk, net); - atomic_set(&sk->sk_wmem_alloc, 1); + refcount_set(&sk->sk_wmem_alloc, 1); mem_cgroup_sk_alloc(sk); cgroup_sk_alloc(&sk->sk_cgrp_data); @@ -1552,7 +1552,7 @@ static void __sk_destruct(struct rcu_head *head) sk->sk_destruct(sk); filter = rcu_dereference_check(sk->sk_filter, - atomic_read(&sk->sk_wmem_alloc) == 0); + refcount_read(&sk->sk_wmem_alloc) == 0); if (filter) { sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); @@ -1602,7 +1602,7 @@ void sk_free(struct sock *sk) * some packets are still in some tx queue. * If not null, sock_wfree() will call __sk_free(sk) later */ - if (atomic_dec_and_test(&sk->sk_wmem_alloc)) + if (refcount_dec_and_test(&sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sk_free); @@ -1659,7 +1659,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ - atomic_set(&newsk->sk_wmem_alloc, 1); + refcount_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); sk_init_common(newsk); @@ -1708,7 +1708,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) * (Documentation/RCU/rculist_nulls.txt for details) */ smp_wmb(); - atomic_set(&newsk->sk_refcnt, 2); + refcount_set(&newsk->sk_refcnt, 2); /* * Increment the counter in the same struct proto as the master @@ -1787,7 +1787,7 @@ void sock_wfree(struct sk_buff *skb) * Keep a reference on sk_wmem_alloc, this will be released * after sk_write_space() call */ - atomic_sub(len - 1, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc)); sk->sk_write_space(sk); len = 1; } @@ -1795,7 +1795,7 @@ void sock_wfree(struct sk_buff *skb) * if sk_wmem_alloc reaches 0, we must finish what sk_free() * could not do because of in-flight packets */ - if (atomic_sub_and_test(len, &sk->sk_wmem_alloc)) + if (refcount_sub_and_test(len, &sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sock_wfree); @@ -1807,7 +1807,7 @@ void __sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; - if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) + if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) __sk_free(sk); } @@ -1829,7 +1829,7 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) * is enough to guarantee sk_free() wont free this sock until * all in-flight packets are completed */ - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(skb_set_owner_w); @@ -1851,8 +1851,8 @@ void skb_orphan_partial(struct sk_buff *skb) ) { struct sock *sk = skb->sk; - if (atomic_inc_not_zero(&sk->sk_refcnt)) { - atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + if (refcount_inc_not_zero(&sk->sk_refcnt)) { + WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); skb->destructor = sock_efree; } } else { @@ -1912,7 +1912,7 @@ EXPORT_SYMBOL(sock_i_ino); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority) { - if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { + if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { struct sk_buff *skb = alloc_skb(size, priority); if (skb) { skb_set_owner_w(skb, sk); @@ -1987,7 +1987,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) + if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) break; if (sk->sk_shutdown & SEND_SHUTDOWN) break; @@ -2310,7 +2310,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) if (sk->sk_type == SOCK_STREAM) { if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) return 1; - } else if (atomic_read(&sk->sk_wmem_alloc) < + } else if (refcount_read(&sk->sk_wmem_alloc) < prot->sysctl_wmem[0]) return 1; } @@ -2577,7 +2577,7 @@ static void sock_def_write_space(struct sock *sk) /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | @@ -2687,7 +2687,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) * (Documentation/RCU/rculist_nulls.txt for details) */ smp_wmb(); - atomic_set(&sk->sk_refcnt, 1); + refcount_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4fccc0c37fbd..c376af5bfdfb 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -353,7 +353,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); ireq->pktopts = skb; } ireq->ir_iif = sk->sk_bound_dev_if; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index eeb5fc561f80..21dedf6fd0f7 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -559,7 +559,7 @@ static inline void dn_neigh_format_entry(struct seq_file *seq, (dn->flags&DN_NDFLAG_R2) ? "2" : "-", (dn->flags&DN_NDFLAG_P3) ? "3" : "-", dn->n.nud_state, - atomic_read(&dn->n.refcnt), + refcount_read(&dn->n.refcnt), dn->blksize, (dn->n.dev) ? dn->n.dev->name : "?"); read_unlock(&n->lock); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 58925b6597de..76c2077c3f5b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index ae206163c273..c2044775ae7d 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -265,7 +265,7 @@ static int cipso_v4_cache_check(const unsigned char *key, entry->key_len == key_len && memcmp(entry->key, key, key_len) == 0) { entry->activity += 1; - atomic_inc(&entry->lsm_data->refcount); + refcount_inc(&entry->lsm_data->refcount); secattr->cache = entry->lsm_data; secattr->flags |= NETLBL_SECATTR_CACHE; secattr->type = NETLBL_NLTYPE_CIPSOV4; @@ -332,7 +332,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, } entry->key_len = cipso_ptr_len; entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); - atomic_inc(&secattr->cache->refcount); + refcount_inc(&secattr->cache->refcount); entry->lsm_data = secattr->cache; bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a7dd088d5fc9..38d9af9b917c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -252,7 +252,7 @@ static struct in_device *inetdev_init(struct net_device *dev) /* Reference in_dev->dev */ dev_hold(dev); /* Account for reference dev->ip_ptr (below) */ - in_dev_hold(in_dev); + refcount_set(&in_dev->refcnt, 1); err = devinet_sysctl_register(in_dev); if (err) { diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1f18b4650253..0cbee0a666ff 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -307,7 +307,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * skb->data_len += tailen; skb->truesize += tailen; if (sk) - atomic_add(tailen, &sk->sk_wmem_alloc); + refcount_add(tailen, &sk->sk_wmem_alloc); goto out; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index c4032302d7cd..28f14afd0dd3 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -173,7 +173,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, static void ip_ma_put(struct ip_mc_list *im) { - if (atomic_dec_and_test(&im->refcnt)) { + if (refcount_dec_and_test(&im->refcnt)) { in_dev_put(im->interface); kfree_rcu(im, rcu); } @@ -199,7 +199,7 @@ static void igmp_stop_timer(struct ip_mc_list *im) { spin_lock_bh(&im->lock); if (del_timer(&im->timer)) - atomic_dec(&im->refcnt); + refcount_dec(&im->refcnt); im->tm_running = 0; im->reporter = 0; im->unsolicit_count = 0; @@ -213,7 +213,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) im->tm_running = 1; if (!mod_timer(&im->timer, jiffies+tv+2)) - atomic_inc(&im->refcnt); + refcount_inc(&im->refcnt); } static void igmp_gq_start_timer(struct in_device *in_dev) @@ -249,7 +249,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay) spin_unlock_bh(&im->lock); return; } - atomic_dec(&im->refcnt); + refcount_dec(&im->refcnt); } igmp_start_timer(im, max_delay); spin_unlock_bh(&im->lock); @@ -1374,7 +1374,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) /* initial mode is (EX, empty) */ im->sfmode = MCAST_EXCLUDE; im->sfcount[MCAST_EXCLUDE] = 1; - atomic_set(&im->refcnt, 1); + refcount_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a3fa1a5b6d98..4089c013cb03 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -756,7 +756,7 @@ static void reqsk_queue_hash_req(struct request_sock *req, * are committed to memory and refcnt initialized. */ smp_wmb(); - atomic_set(&req->rsk_refcnt, 2 + 1); + refcount_set(&req->rsk_refcnt, 2 + 1); } void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index b5e9317eaf9e..96e95e83cc61 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -276,11 +276,11 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) { if (del_timer(&fq->timer)) - atomic_dec(&fq->refcnt); + refcount_dec(&fq->refcnt); if (!(fq->flags & INET_FRAG_COMPLETE)) { fq_unlink(fq, f); - atomic_dec(&fq->refcnt); + refcount_dec(&fq->refcnt); } } EXPORT_SYMBOL(inet_frag_kill); @@ -329,7 +329,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, */ hlist_for_each_entry(qp, &hb->chain, list) { if (qp->net == nf && f->match(qp, arg)) { - atomic_inc(&qp->refcnt); + refcount_inc(&qp->refcnt); spin_unlock(&hb->chain_lock); qp_in->flags |= INET_FRAG_COMPLETE; inet_frag_put(qp_in, f); @@ -339,9 +339,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, #endif qp = qp_in; if (!mod_timer(&qp->timer, jiffies + nf->timeout)) - atomic_inc(&qp->refcnt); + refcount_inc(&qp->refcnt); - atomic_inc(&qp->refcnt); + refcount_inc(&qp->refcnt); hlist_add_head(&qp->list, &hb->chain); spin_unlock(&hb->chain_lock); @@ -370,7 +370,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); - atomic_set(&q->refcnt, 1); + refcount_set(&q->refcnt, 1); return q; } @@ -405,7 +405,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, spin_lock(&hb->chain_lock); hlist_for_each_entry(q, &hb->chain, list) { if (q->net == nf && f->match(q, key)) { - atomic_inc(&q->refcnt); + refcount_inc(&q->refcnt); spin_unlock(&hb->chain_lock); return q; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e9a59d2d91d4..2e3389d614d1 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -43,7 +43,7 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr, /* This function handles inet_sock, but also timewait and request sockets * for IPv4/IPv6. */ -u32 sk_ehashfn(const struct sock *sk) +static u32 sk_ehashfn(const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6 && @@ -246,7 +246,7 @@ EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* All sockets share common refcount, but have different destructors */ void sock_gen_put(struct sock *sk) { - if (!atomic_dec_and_test(&sk->sk_refcnt)) + if (!refcount_dec_and_test(&sk->sk_refcnt)) return; if (sk->sk_state == TCP_TIME_WAIT) @@ -287,7 +287,7 @@ begin: continue; if (likely(INET_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!INET_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index f8aff2c71cde..5b039159e67a 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -76,7 +76,7 @@ void inet_twsk_free(struct inet_timewait_sock *tw) void inet_twsk_put(struct inet_timewait_sock *tw) { - if (atomic_dec_and_test(&tw->tw_refcnt)) + if (refcount_dec_and_test(&tw->tw_refcnt)) inet_twsk_free(tw); } EXPORT_SYMBOL_GPL(inet_twsk_put); @@ -131,7 +131,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, * We can use atomic_set() because prior spin_lock()/spin_unlock() * committed into memory all tw fields. */ - atomic_set(&tw->tw_refcnt, 4); + refcount_set(&tw->tw_refcnt, 4); inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ @@ -195,7 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, * to a non null value before everything is setup for this * timewait socket. */ - atomic_set(&tw->tw_refcnt, 0); + refcount_set(&tw->tw_refcnt, 0); __module_get(tw->tw_prot->owner); } @@ -278,7 +278,7 @@ restart: atomic_read(&twsk_net(tw)->count)) continue; - if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) + if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) continue; if (unlikely((tw->tw_family != family) || diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 86fa45809540..c5a117cc6619 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -115,7 +115,7 @@ static void inetpeer_gc_worker(struct work_struct *work) n = list_entry(p->gc_list.next, struct inet_peer, gc_list); - if (!atomic_read(&p->refcnt)) { + if (refcount_read(&p->refcnt) == 1) { list_del(&p->gc_list); kmem_cache_free(peer_cachep, p); } @@ -202,10 +202,11 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, int cmp = inetpeer_addr_cmp(daddr, &u->daddr); if (cmp == 0) { /* Before taking a reference, check if this entry was - * deleted (refcnt=-1) + * deleted (refcnt=0) */ - if (!atomic_add_unless(&u->refcnt, 1, -1)) + if (!refcount_inc_not_zero(&u->refcnt)) { u = NULL; + } return u; } if (cmp == -1) @@ -382,11 +383,10 @@ static int inet_peer_gc(struct inet_peer_base *base, while (stackptr > stack) { stackptr--; p = rcu_deref_locked(**stackptr, base); - if (atomic_read(&p->refcnt) == 0) { + if (refcount_read(&p->refcnt) == 1) { smp_rmb(); delta = (__u32)jiffies - p->dtime; - if (delta >= ttl && - atomic_cmpxchg(&p->refcnt, 0, -1) == 0) { + if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) { p->gc_next = gchead; gchead = p; } @@ -432,7 +432,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, relookup: p = lookup(daddr, stack, base); if (p != peer_avl_empty) { - atomic_inc(&p->refcnt); + refcount_inc(&p->refcnt); write_sequnlock_bh(&base->lock); return p; } @@ -444,7 +444,7 @@ relookup: p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; if (p) { p->daddr = *daddr; - atomic_set(&p->refcnt, 1); + refcount_set(&p->refcnt, 2); atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; @@ -468,7 +468,7 @@ void inet_putpeer(struct inet_peer *p) { p->dtime = (__u32)jiffies; smp_mb__before_atomic(); - atomic_dec(&p->refcnt); + refcount_dec(&p->refcnt); } EXPORT_SYMBOL_GPL(inet_putpeer); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b3cdeec85f1f..9a8cfac503dc 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -312,7 +312,7 @@ static int ip_frag_reinit(struct ipq *qp) unsigned int sum_truesize = 0; if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { - atomic_inc(&qp->q.refcnt); + refcount_inc(&qp->q.refcnt); return -ETIMEDOUT; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7a3fd25e8913..2e61e2af251a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -964,7 +964,8 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && + if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) || + (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { @@ -1036,7 +1037,7 @@ alloc_new_skb: (flags & MSG_DONTWAIT), &err); } else { skb = NULL; - if (atomic_read(&sk->sk_wmem_alloc) <= + if (refcount_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len + 15, 1, @@ -1144,7 +1145,7 @@ alloc_new_skb: skb->len += copy; skb->data_len += copy; skb->truesize += copy; - atomic_add(copy, &sk->sk_wmem_alloc); + refcount_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; @@ -1368,7 +1369,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, skb->len += len; skb->data_len += len; skb->truesize += len; - atomic_add(len, &sk->sk_wmem_alloc); + refcount_add(len, &sk->sk_wmem_alloc); offset += len; size -= len; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a1d521be612b..bb909f1d7537 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2406,6 +2406,67 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); } +static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[RTA_MAX + 1]; + struct sk_buff *skb = NULL; + struct mfc_cache *cache; + struct mr_table *mrt; + struct rtmsg *rtm; + __be32 src, grp; + u32 tableid; + int err; + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err < 0) + goto errout; + + rtm = nlmsg_data(nlh); + + src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; + grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; + tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; + + mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); + if (IS_ERR(mrt)) { + err = PTR_ERR(mrt); + goto errout_free; + } + + /* entries are added/deleted only under RTNL */ + rcu_read_lock(); + cache = ipmr_cache_find(mrt, src, grp); + rcu_read_unlock(); + if (!cache) { + err = -ENOENT; + goto errout_free; + } + + skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto errout_free; + } + + err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, cache, + RTM_NEWROUTE, 0); + if (err < 0) + goto errout_free; + + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); + +errout: + return err; + +errout_free: + kfree_skb(skb); + goto errout; +} + static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -3053,7 +3114,7 @@ int __init ip_mr_init(void) } #endif rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, - NULL, ipmr_rtm_dumproute, NULL); + ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL); rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, ipmr_rtm_route, NULL, NULL); rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 038f293c2376..7d72decb80f9 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -47,7 +47,7 @@ struct clusterip_config { __be32 clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ - struct net_device *dev; /* device */ + int ifindex; /* device ifindex */ u_int16_t num_total_nodes; /* total number of nodes */ unsigned long local_nodes; /* node number array */ @@ -57,6 +57,9 @@ struct clusterip_config { enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */ struct rcu_head rcu; + + char ifname[IFNAMSIZ]; /* device ifname */ + struct notifier_block notifier; /* refresh c->ifindex in it */ }; #ifdef CONFIG_PROC_FS @@ -98,9 +101,8 @@ clusterip_config_put(struct clusterip_config *c) * entry(rule) is removed, remove the config from lists, but don't free it * yet, since proc-files could still be holding references */ static inline void -clusterip_config_entry_put(struct clusterip_config *c) +clusterip_config_entry_put(struct net *net, struct clusterip_config *c) { - struct net *net = dev_net(c->dev); struct clusterip_net *cn = net_generic(net, clusterip_net_id); local_bh_disable(); @@ -109,8 +111,7 @@ clusterip_config_entry_put(struct clusterip_config *c) spin_unlock(&cn->lock); local_bh_enable(); - dev_mc_del(c->dev, c->clustermac); - dev_put(c->dev); + unregister_netdevice_notifier(&c->notifier); /* In case anyone still accesses the file, the open/close * functions are also incrementing the refcount on their own, @@ -170,19 +171,55 @@ clusterip_config_init_nodelist(struct clusterip_config *c, set_bit(i->local_nodes[n] - 1, &c->local_nodes); } -static struct clusterip_config * -clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, - struct net_device *dev) +static int +clusterip_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) { - struct net *net = dev_net(dev); + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct clusterip_config *c; + + c = container_of(this, struct clusterip_config, notifier); + switch (event) { + case NETDEV_REGISTER: + if (!strcmp(dev->name, c->ifname)) { + c->ifindex = dev->ifindex; + dev_mc_add(dev, c->clustermac); + } + break; + case NETDEV_UNREGISTER: + if (dev->ifindex == c->ifindex) { + dev_mc_del(dev, c->clustermac); + c->ifindex = -1; + } + break; + case NETDEV_CHANGENAME: + if (!strcmp(dev->name, c->ifname)) { + c->ifindex = dev->ifindex; + dev_mc_add(dev, c->clustermac); + } else if (dev->ifindex == c->ifindex) { + dev_mc_del(dev, c->clustermac); + c->ifindex = -1; + } + break; + } + + return NOTIFY_DONE; +} + +static struct clusterip_config * +clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, + __be32 ip, const char *iniface) +{ struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_config *c; + int err; c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return ERR_PTR(-ENOMEM); - c->dev = dev; + strcpy(c->ifname, iniface); + c->ifindex = -1; c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; @@ -213,17 +250,27 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, cn->procdir, &clusterip_proc_fops, c); if (!c->pde) { - spin_lock_bh(&cn->lock); - list_del_rcu(&c->list); - spin_unlock_bh(&cn->lock); - kfree(c); - - return ERR_PTR(-ENOMEM); + err = -ENOMEM; + goto err; } } #endif - return c; + c->notifier.notifier_call = clusterip_netdev_event; + err = register_netdevice_notifier(&c->notifier); + if (!err) + return c; + +#ifdef CONFIG_PROC_FS + proc_remove(c->pde); +err: +#endif + spin_lock_bh(&cn->lock); + list_del_rcu(&c->list); + spin_unlock_bh(&cn->lock); + kfree(c); + + return ERR_PTR(err); } #ifdef CONFIG_PROC_FS @@ -425,14 +472,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) e->ip.iniface); return -ENOENT; } + dev_put(dev); - config = clusterip_config_init(cipinfo, - e->ip.dst.s_addr, dev); - if (IS_ERR(config)) { - dev_put(dev); + config = clusterip_config_init(par->net, cipinfo, + e->ip.dst.s_addr, + e->ip.iniface); + if (IS_ERR(config)) return PTR_ERR(config); - } - dev_mc_add(config->dev, config->clustermac); } } cipinfo->config = config; @@ -458,7 +504,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ - clusterip_config_entry_put(cipinfo->config); + clusterip_config_entry_put(par->net, cipinfo->config); clusterip_config_put(cipinfo->config); @@ -558,10 +604,9 @@ arp_mangle(void *priv, * addresses on different interfacs. However, in the CLUSTERIP case * this wouldn't work, since we didn't subscribe the mcast group on * other interfaces */ - if (c->dev != state->out) { - pr_debug("not mangling arp reply on different " - "interface: cip'%s'-skb'%s'\n", - c->dev->name, state->out->name); + if (c->ifindex != state->out->ifindex) { + pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n", + c->ifindex, state->out->ifindex); clusterip_config_put(c); return NF_ACCEPT; } @@ -743,14 +788,20 @@ static const struct file_operations clusterip_proc_fops = { static int clusterip_net_init(struct net *net) { struct clusterip_net *cn = net_generic(net, clusterip_net_id); + int ret; INIT_LIST_HEAD(&cn->configs); spin_lock_init(&cn->lock); + ret = nf_register_net_hook(net, &cip_arp_ops); + if (ret < 0) + return ret; + #ifdef CONFIG_PROC_FS cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); if (!cn->procdir) { + nf_unregister_net_hook(net, &cip_arp_ops); pr_err("Unable to proc dir entry\n"); return -ENOMEM; } @@ -765,6 +816,7 @@ static void clusterip_net_exit(struct net *net) struct clusterip_net *cn = net_generic(net, clusterip_net_id); proc_remove(cn->procdir); #endif + nf_unregister_net_hook(net, &cip_arp_ops); } static struct pernet_operations clusterip_net_ops = { @@ -786,17 +838,11 @@ static int __init clusterip_tg_init(void) if (ret < 0) goto cleanup_subsys; - ret = nf_register_hook(&cip_arp_ops); - if (ret < 0) - goto cleanup_target; - pr_info("ClusterIP Version %s loaded successfully\n", CLUSTERIP_VERSION); return 0; -cleanup_target: - xt_unregister_target(&clusterip_tg_reg); cleanup_subsys: unregister_pernet_subsys(&clusterip_net_ops); return ret; @@ -806,7 +852,6 @@ static void __exit clusterip_tg_exit(void) { pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); - nf_unregister_hook(&cip_arp_ops); xt_unregister_target(&clusterip_tg_reg); unregister_pernet_subsys(&clusterip_net_ops); diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index dc1dea15c1b4..f39037fca923 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -98,8 +98,8 @@ static int masq_device_event(struct notifier_block *this, */ NF_CT_ASSERT(dev->ifindex != 0); - nf_ct_iterate_cleanup(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); + nf_ct_iterate_cleanup_net(net, device_cmp, + (void *)(long)dev->ifindex, 0, 0); } return NOTIFY_DONE; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index ccfbce13a633..b8f0db54b197 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -290,7 +290,7 @@ void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); - pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter); + pr_debug("isk->refcnt = %d\n", refcount_read(&sk->sk_refcnt)); sk_common_release(sk); } @@ -1127,7 +1127,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, 0, 0L, 0, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bdffad875691..b0bb5d0a30bd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1063,7 +1063,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 0, 0L, 0, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } static int raw_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 7835bb4a1fab..0905cf04c2a4 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -213,7 +213,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, NULL, &own_req); if (child) { - atomic_set(&req->rsk_refcnt, 1); + refcount_set(&req->rsk_refcnt, 1); tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); inet_csk_reqsk_queue_add(sk, req, child); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 058f509ca98e..71ce33decd97 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -664,7 +664,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, return skb->len < size_goal && sysctl_tcp_autocorking && skb != tcp_write_queue_head(sk) && - atomic_read(&sk->sk_wmem_alloc) > skb->truesize; + refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } static void tcp_push(struct sock *sk, int flags, int mss_now, @@ -692,7 +692,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, /* It is possible TX completion already happened * before we set TSQ_THROTTLED. */ - if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize) + if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize) return; } @@ -2350,6 +2350,8 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); + dst_release(sk->sk_rx_dst); + sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); /* Clean up fastopen related fields */ @@ -2479,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(sk, name); + err = tcp_set_congestion_control(sk, name, true); release_sock(sk); return err; } @@ -3062,6 +3064,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; len = min_t(unsigned int, len, TCP_ULP_NAME_MAX); + if (!icsk->icsk_ulp_ops) { + if (put_user(0, optlen)) + return -EFAULT; + return 0; + } if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, icsk->icsk_ulp_ops->name, len)) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 324c9bcc5456..fde983f6376b 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk) INET_ECN_dontxmit(sk); } -static void tcp_reinit_congestion_control(struct sock *sk, - const struct tcp_congestion_ops *ca) +void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -333,8 +333,12 @@ out: return ret; } -/* Change congestion control for socket */ -int tcp_set_congestion_control(struct sock *sk, const char *name) +/* Change congestion control for socket. If load is false, then it is the + * responsibility of the caller to call tcp_init_congestion_control or + * tcp_reinit_congestion_control (if the current congestion control was + * already initialized. + */ +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; @@ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) return -EPERM; rcu_read_lock(); - ca = __tcp_ca_find_autoload(name); + if (!load) + ca = tcp_ca_find(name); + else + ca = __tcp_ca_find_autoload(name); /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { icsk->icsk_ca_setsockopt = 1; goto out; } - if (!ca) + if (!ca) { err = -ENOENT; - else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || - ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) + } else if (!load) { + icsk->icsk_ca_ops = ca; + if (!try_module_get(ca->owner)) + err = -EBUSY; + } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || + ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) { err = -EPERM; - else if (!try_module_get(ca->owner)) + } else if (!try_module_get(ca->owner)) { err = -EBUSY; - else + } else { tcp_reinit_congestion_control(sk, ca); + } out: rcu_read_unlock(); return err; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4af82b914dd4..ce9c7fef200f 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -214,13 +214,14 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT, TCP_RTO_MAX); - atomic_set(&req->rsk_refcnt, 2); + refcount_set(&req->rsk_refcnt, 2); /* Now finish processing the fastopen child socket. */ inet_csk(child)->icsk_af_ops->rebuild_header(child); tcp_init_congestion_control(child); tcp_mtup_init(child); tcp_init_metrics(child); + tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tcp_init_buffer_space(child); tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2ab7e2fa9bb9..2920e0cb09f8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5571,7 +5571,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); - + tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB); tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on first data @@ -5977,6 +5977,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } else { /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); + tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tcp_init_congestion_control(sk); tcp_mtup_init(sk); @@ -6190,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req, ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || - (ecn_ok_dst & DST_FEATURE_ECN_CA)) + (ecn_ok_dst & DST_FEATURE_ECN_CA) || + tcp_bpf_ca_needs_ecn((struct sock *)req)) inet_rsk(req)->ecn_ok = 1; } @@ -6406,7 +6408,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } else { tcp_rsk(req)->tfo_listener = false; if (!want_cookie) - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + inet_csk_reqsk_queue_hash_add(sk, req, + tcp_timeout_init((struct sock *)req)); af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : TCP_SYNACK_COOKIE); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d774bcd9a54b..6ec6900eb300 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2323,7 +2323,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), icsk->icsk_probes_out, sock_i_ino(sk), - atomic_read(&sk->sk_refcnt), sk, + refcount_read(&sk->sk_refcnt), sk, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, @@ -2349,7 +2349,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, - atomic_read(&tw->tw_refcnt), tw); + refcount_read(&tw->tw_refcnt), tw); } #define TMPSZ 150 diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d30ee31e94eb..0ff83c1637d8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -351,6 +351,7 @@ void tcp_openreq_init_rwin(struct request_sock *req, int full_space = tcp_full_space(sk_listener); u32 window_clamp; __u8 rcv_wscale; + u32 rcv_wnd; int mss; mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); @@ -363,6 +364,12 @@ void tcp_openreq_init_rwin(struct request_sock *req, (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) req->rsk_window_clamp = full_space; + rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req); + if (rcv_wnd == 0) + rcv_wnd = dst_metric(dst, RTAX_INITRWND); + else if (full_space < rcv_wnd * mss) + full_space = rcv_wnd * mss; + /* tcp_full_space because it is guaranteed to be the first packet */ tcp_select_initial_window(full_space, mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), @@ -370,7 +377,7 @@ void tcp_openreq_init_rwin(struct request_sock *req, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, - dst_metric(dst, RTAX_INITRWND)); + rcv_wnd); ireq->rcv_wscale = rcv_wscale; } EXPORT_SYMBOL(tcp_openreq_init_rwin); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc68da38ea86..11f69bbf9307 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -152,7 +152,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); sum_truesize += skb->truesize; - atomic_add(sum_truesize - gso_skb->truesize, + refcount_add(sum_truesize - gso_skb->truesize, &skb->sk->sk_wmem_alloc); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9a9c395b6235..4d36f0b093e6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -316,7 +316,8 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; if (!(tp->ecn_flags & TCP_ECN_OK)) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; - else if (tcp_ca_needs_ecn(sk)) + else if (tcp_ca_needs_ecn(sk) || + tcp_bpf_ca_needs_ecn(sk)) INET_ECN_xmit(sk); } @@ -324,8 +325,9 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || - tcp_ca_needs_ecn(sk); + tcp_ca_needs_ecn(sk) || bpf_needs_ecn; if (!use_ecn) { const struct dst_entry *dst = __sk_dst_get(sk); @@ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) if (use_ecn) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; tp->ecn_flags = TCP_ECN_OK; - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) INET_ECN_xmit(sk); } } @@ -861,12 +863,11 @@ void tcp_wfree(struct sk_buff *skb) struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); unsigned long flags, nval, oval; - int wmem; /* Keep one reference on sk_wmem_alloc. * Will be released by sk_free() from here or tcp_tasklet_func() */ - wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc)); /* If this softirq is serviced by ksoftirqd, we are likely under stress. * Wait until our queues (qdisc + devices) are drained. @@ -875,7 +876,7 @@ void tcp_wfree(struct sk_buff *skb) * - chance for incoming ACK (processed by another cpu maybe) * to migrate this flow (skb->ooo_okay will be eventually set) */ - if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) + if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) goto out; for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { @@ -925,7 +926,7 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) if (nval != oval) continue; - if (!atomic_inc_not_zero(&sk->sk_wmem_alloc)) + if (!refcount_inc_not_zero(&sk->sk_wmem_alloc)) break; /* queue this socket to tasklet queue */ tsq = this_cpu_ptr(&tsq_tasklet); @@ -1045,7 +1046,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb->sk = sk; skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; skb_set_hash_from_sk(skb, sk); - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); @@ -2176,7 +2177,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); limit <<= factor; - if (atomic_read(&sk->sk_wmem_alloc) > limit) { + if (refcount_read(&sk->sk_wmem_alloc) > limit) { /* Always send the 1st or 2nd skb in write queue. * No need to wait for TX completion to call us back, * after softirq/tasklet schedule. @@ -2192,7 +2193,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, * test again the condition. */ smp_mb__after_atomic(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) + if (refcount_read(&sk->sk_wmem_alloc) > limit) return true; } return false; @@ -2812,7 +2813,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) /* Do not sent more than we queued. 1/4 is reserved for possible * copying overhead: fragmentation, tunneling, mangling etc. */ - if (atomic_read(&sk->sk_wmem_alloc) > + if (refcount_read(&sk->sk_wmem_alloc) > min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) return -EAGAIN; @@ -3267,6 +3268,7 @@ static void tcp_connect_init(struct sock *sk) const struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); __u8 rcv_wscale; + u32 rcv_wnd; /* We'll fix this up when we get a response from the other end. * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. @@ -3300,13 +3302,17 @@ static void tcp_connect_init(struct sock *sk) (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0)) tp->window_clamp = tcp_full_space(sk); + rcv_wnd = tcp_rwnd_init_bpf(sk); + if (rcv_wnd == 0) + rcv_wnd = dst_metric(dst, RTAX_INITRWND); + tcp_select_initial_window(tcp_full_space(sk), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, sock_net(sk)->ipv4.sysctl_tcp_window_scaling, &rcv_wscale, - dst_metric(dst, RTAX_INITRWND)); + rcv_wnd); tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; @@ -3327,7 +3333,7 @@ static void tcp_connect_init(struct sock *sk) tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = tcp_timeout_init(sk); inet_csk(sk)->icsk_retransmits = 0; tcp_clear_retrans(tp); } @@ -3440,6 +3446,7 @@ int tcp_connect(struct sock *sk) struct sk_buff *buff; int err; + tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB); tcp_connect_init(sk); if (unlikely(tp->repair)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 47c7aa0501af..25294d43e147 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -577,7 +577,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } @@ -1163,24 +1163,7 @@ out: return ret; } -/* Copy as much information as possible into skb->dev_scratch to avoid - * possibly multiple cache miss on dequeue(); - */ #if BITS_PER_LONG == 64 - -/* we can store multiple info here: truesize, len and the bit needed to - * compute skb_csum_unnecessary will be on cold cache lines at recvmsg - * time. - * skb->len can be stored on 16 bits since the udp header has been already - * validated and pulled. - */ -struct udp_dev_scratch { - u32 truesize; - u16 len; - bool is_linear; - bool csum_unnecessary; -}; - static void udp_set_dev_scratch(struct sk_buff *skb) { struct udp_dev_scratch *scratch; @@ -1197,22 +1180,6 @@ static int udp_skb_truesize(struct sk_buff *skb) { return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize; } - -static unsigned int udp_skb_len(struct sk_buff *skb) -{ - return ((struct udp_dev_scratch *)&skb->dev_scratch)->len; -} - -static bool udp_skb_csum_unnecessary(struct sk_buff *skb) -{ - return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary; -} - -static bool udp_skb_is_linear(struct sk_buff *skb) -{ - return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear; -} - #else static void udp_set_dev_scratch(struct sk_buff *skb) { @@ -1223,21 +1190,6 @@ static int udp_skb_truesize(struct sk_buff *skb) { return skb->dev_scratch; } - -static unsigned int udp_skb_len(struct sk_buff *skb) -{ - return skb->len; -} - -static bool udp_skb_csum_unnecessary(struct sk_buff *skb) -{ - return skb_csum_unnecessary(skb); -} - -static bool udp_skb_is_linear(struct sk_buff *skb) -{ - return !skb_is_nonlinear(skb); -} #endif /* fully reclaim rmem/fwd memory allocated for skb */ @@ -1598,18 +1550,6 @@ busy_check: } EXPORT_SYMBOL_GPL(__skb_recv_udp); -static int copy_linear_skb(struct sk_buff *skb, int len, int off, - struct iov_iter *to) -{ - int n, copy = len - off; - - n = copy_to_iter(skb->data + off, copy, to); - if (n == copy) - return 0; - - return -EFAULT; -} - /* * This should be easy, if there is something there we * return it, otherwise we block. @@ -2302,7 +2242,7 @@ void udp_v4_early_demux(struct sk_buff *skb) uh->source, iph->saddr, dif); } - if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) + if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return; skb->sk = sk; @@ -2751,7 +2691,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, 0, 0L, 0, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 9a89c10a55f0..4515836d2a3a 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -55,7 +55,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_dport, req->id.idiag_if, tbl, NULL); #endif - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; rcu_read_unlock(); err = -ENOENT; @@ -206,7 +206,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, return -EINVAL; } - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; rcu_read_unlock(); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a885ffcf0973..114fb64cf176 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1927,15 +1927,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (dad_failed) ifp->flags |= IFA_F_DADFAILED; - if (ifp->flags&IFA_F_PERMANENT) { - spin_lock_bh(&ifp->lock); - addrconf_del_dad_work(ifp); - ifp->flags |= IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); - if (dad_failed) - ipv6_ifa_notify(0, ifp); - in6_ifa_put(ifp); - } else if (ifp->flags&IFA_F_TEMPORARY) { + if (ifp->flags&IFA_F_TEMPORARY) { struct inet6_ifaddr *ifpub; spin_lock_bh(&ifp->lock); ifpub = ifp->ifpub; @@ -1948,6 +1940,14 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); + } else if (ifp->flags&IFA_F_PERMANENT || !dad_failed) { + spin_lock_bh(&ifp->lock); + addrconf_del_dad_work(ifp); + ifp->flags |= IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); + if (dad_failed) + ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); } else { ipv6_del_addr(ifp); } @@ -3384,6 +3384,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; struct inet6_dev *idev = __in6_dev_get(dev); + struct net *net = dev_net(dev); int run_pending = 0; int err; @@ -3399,7 +3400,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGEMTU: /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) { - addrconf_ifdown(dev, 1); + addrconf_ifdown(dev, dev != net->loopback_dev); break; } @@ -3515,7 +3516,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, * IPV6_MIN_MTU stop IPv6 on this interface. */ if (dev->mtu < IPV6_MIN_MTU) - addrconf_ifdown(dev, 1); + addrconf_ifdown(dev, dev != net->loopback_dev); } break; diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 8d772fea1dde..44067521e7cd 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -227,7 +227,7 @@ static int calipso_cache_check(const unsigned char *key, entry->key_len == key_len && memcmp(entry->key, key, key_len) == 0) { entry->activity += 1; - atomic_inc(&entry->lsm_data->refcount); + refcount_inc(&entry->lsm_data->refcount); secattr->cache = entry->lsm_data; secattr->flags |= NETLBL_SECATTR_CACHE; secattr->type = NETLBL_NLTYPE_CALIPSO; @@ -296,7 +296,7 @@ static int calipso_cache_add(const unsigned char *calipso_ptr, } entry->key_len = calipso_ptr_len; entry->hash = calipso_map_cache_hash(calipso_ptr, calipso_ptr_len); - atomic_inc(&secattr->cache->refcount); + refcount_inc(&secattr->cache->refcount); entry->lsm_data = secattr->cache; bkt = entry->hash & (CALIPSO_CACHE_BUCKETS - 1); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e011122ebd43..a1f918713006 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -250,8 +250,14 @@ ipv4_connected: */ err = ip6_datagram_dst_update(sk, true); - if (err) + if (err) { + /* Reset daddr and dport so that udp_v6_early_demux() + * fails to find this socket + */ + memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr)); + inet->inet_dport = 0; goto out; + } sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); @@ -1035,6 +1041,6 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 71faffdd55d9..9ed35473dcb5 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -275,7 +275,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info skb->data_len += tailen; skb->truesize += tailen; if (sk) - atomic_add(tailen, &sk->sk_wmem_alloc); + refcount_add(tailen, &sk->sk_wmem_alloc); goto out; } diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index d950d43ba255..f02f131f6435 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -30,6 +30,25 @@ #include <net/ipv6.h> #include <linux/icmpv6.h> +static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen) +{ + int off = sizeof(struct ipv6hdr); + struct ipv6_opt_hdr *exthdr; + + if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP)) + return offsetof(struct ipv6hdr, nexthdr); + + while (off < nhlen) { + exthdr = (void *)ipv6_hdr + off; + if (exthdr->nexthdr == NEXTHDR_ESP) + return off; + + off += ipv6_optlen(exthdr); + } + + return 0; +} + static struct sk_buff **esp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -38,6 +57,7 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head, struct xfrm_state *x; __be32 seq; __be32 spi; + int nhoff; int err; skb_pull(skb, offset); @@ -72,6 +92,11 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head, xo->flags |= XFRM_GRO; + nhoff = esp6_nexthdr_esp_offset(ipv6_hdr(skb), offset); + if (!nhoff) + goto out; + + IP6CB(skb)->nhoff = nhoff; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index d0900918a19e..b13b8f93079d 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -75,7 +75,7 @@ begin: continue; if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) continue; - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { @@ -172,7 +172,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, ntohs(dport), dif, &refcounted); - if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5baa6fab4b97..1422d6c08377 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1385,7 +1385,7 @@ emsgsize: */ cork->length += length; - if ((((length + fragheaderlen) > mtu) || + if ((((length + (skb ? skb->len : headersize)) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && @@ -1472,7 +1472,7 @@ alloc_new_skb: (flags & MSG_DONTWAIT), &err); } else { skb = NULL; - if (atomic_read(&sk->sk_wmem_alloc) <= + if (refcount_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len, 1, @@ -1581,7 +1581,7 @@ alloc_new_skb: skb->len += copy; skb->data_len += copy; skb->truesize += copy; - atomic_add(copy, &sk->sk_wmem_alloc); + refcount_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 2297c9f073ba..d7b679037bae 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -75,8 +75,8 @@ static int masq_device_event(struct notifier_block *this, struct net *net = dev_net(dev); if (event == NETDEV_DOWN) - nf_ct_iterate_cleanup(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); + nf_ct_iterate_cleanup_net(net, device_cmp, + (void *)(long)dev->ifindex, 0, 0); return NOTIFY_DONE; } @@ -99,7 +99,7 @@ static void iterate_cleanup_work(struct work_struct *work) w = container_of(work, struct masq_dev_work, work); index = w->ifindex; - nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0); + nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0); put_net(w->net); kfree(w); @@ -110,12 +110,12 @@ static void iterate_cleanup_work(struct work_struct *work) /* ipv6 inet notifier is an atomic notifier, i.e. we cannot * schedule. * - * Unfortunately, nf_ct_iterate_cleanup can run for a long + * Unfortunately, nf_ct_iterate_cleanup_net can run for a long * time if there are lots of conntracks and the system * handles high softirq load, so it frequently calls cond_resched * while iterating the conntrack table. * - * So we defer nf_ct_iterate_cleanup walk to the system workqueue. + * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue. * * As we can have 'a lot' of inet_events (depending on amount * of ipv6 addresses being deleted), we also need to add an upper diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2e4490076061..0488a24c2a44 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3730,7 +3730,11 @@ static int ip6_route_dev_notify(struct notifier_block *this, net->ipv6.ip6_blk_hole_entry->dst.dev = dev; net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); #endif - } else if (event == NETDEV_UNREGISTER) { + } else if (event == NETDEV_UNREGISTER && + dev->reg_state != NETREG_UNREGISTERED) { + /* NETDEV_UNREGISTER could be fired for multiple times by + * netdev_wait_allrefs(). Make sure we only call this once. + */ in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e9958b1398cb..ac912bb21747 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -305,7 +305,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, * we try harder to allocate. */ kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? - kcalloc(cmax, sizeof(*kp), GFP_KERNEL) : + kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) : NULL; rcu_read_lock(); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2f7e99af67db..7b75b0620730 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -194,7 +194,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); ireq->pktopts = skb; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f85cbfc183d6..2521690d62d6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -734,7 +734,7 @@ static void tcp_v6_init_req(struct request_sock *req, np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || np->repflow)) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); ireq->pktopts = skb; } } @@ -1809,7 +1809,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), icsk->icsk_probes_out, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, + refcount_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, @@ -1842,7 +1842,7 @@ static void get_timewait6_sock(struct seq_file *seq, dest->s6_addr32[2], dest->s6_addr32[3], destp, tw->tw_substate, 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, - atomic_read(&tw->tw_refcnt), tw); + refcount_read(&tw->tw_refcnt), tw); } static int tcp6_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d1d728805729..4a3e65626e8b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -325,7 +325,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } @@ -362,7 +362,7 @@ try_again: if (!skb) return err; - ulen = skb->len; + ulen = udp_skb_len(skb); copied = len; if (copied > ulen - off) copied = ulen - off; @@ -379,14 +379,18 @@ try_again: if (copied < ulen || peeking || (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { - checksum_valid = !udp_lib_checksum_complete(skb); + checksum_valid = udp_skb_csum_unnecessary(skb) || + !__udp_lib_checksum_complete(skb); if (!checksum_valid) goto csum_copy_err; } - if (checksum_valid || skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, off, msg, copied); - else { + if (checksum_valid || udp_skb_csum_unnecessary(skb)) { + if (udp_skb_is_linear(skb)) + err = copy_linear_skb(skb, copied, off, &msg->msg_iter); + else + err = skb_copy_datagram_msg(skb, off, msg, copied); + } else { err = skb_copy_and_csum_datagram_msg(skb, off, msg); if (err == -EINVAL) goto csum_copy_err; @@ -881,7 +885,8 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, struct sock *sk; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif)) + if (sk->sk_state == TCP_ESTABLISHED && + INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif)) return sk; /* Only check first socket in chain */ break; @@ -911,7 +916,7 @@ static void udp_v6_early_demux(struct sk_buff *skb) else return; - if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) + if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return; skb->sk = sk; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 08a807b29298..3ef5d913e7a3 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -43,8 +43,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) return 1; #endif - ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ac033e413bc5..148533169b1d 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -402,7 +402,7 @@ static void iucv_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); } diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index bf75c9231cca..c343ac60bf50 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -162,7 +162,7 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, psock->sk->sk_receive_queue.qlen, atomic_read(&psock->sk->sk_rmem_alloc), psock->sk->sk_write_queue.qlen, - atomic_read(&psock->sk->sk_wmem_alloc)); + refcount_read(&psock->sk->sk_wmem_alloc)); if (psock->done) seq_puts(seq, "Done "); diff --git a/net/key/af_key.c b/net/key/af_key.c index ce9b8565d825..edcf1d0f82c8 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -109,7 +109,7 @@ static void pfkey_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); atomic_dec(&net_pfkey->socks_nr); } @@ -203,11 +203,11 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, sock_hold(sk); if (*skb2 == NULL) { - if (atomic_read(&skb->users) != 1) { + if (refcount_read(&skb->users) != 1) { *skb2 = skb_clone(skb, allocation); } else { *skb2 = skb; - atomic_inc(&skb->users); + refcount_inc(&skb->users); } } if (*skb2 != NULL) { @@ -1150,6 +1150,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } + err = -ENOBUFS; key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (sa->sadb_sa_auth) { int keysize = 0; @@ -1161,8 +1162,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (key) keysize = (key->sadb_key_bits + 7) / 8; x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL); - if (!x->aalg) + if (!x->aalg) { + err = -ENOMEM; goto out; + } strcpy(x->aalg->alg_name, a->name); x->aalg->alg_key_len = 0; if (key) { @@ -1181,8 +1184,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL); - if (!x->calg) + if (!x->calg) { + err = -ENOMEM; goto out; + } strcpy(x->calg->alg_name, a->name); x->props.calgo = sa->sadb_sa_encrypt; } else { @@ -1196,8 +1201,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (key) keysize = (key->sadb_key_bits + 7) / 8; x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL); - if (!x->ealg) + if (!x->ealg) { + err = -ENOMEM; goto out; + } strcpy(x->ealg->alg_name, a->name); x->ealg->alg_key_len = 0; if (key) { @@ -1242,8 +1249,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, struct xfrm_encap_tmpl *natt; x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); - if (!x->encap) + if (!x->encap) { + err = -ENOMEM; goto out; + } natt = x->encap; n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]; @@ -2742,6 +2751,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); + if (!err) + xfrm_garbage_collect(net); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ @@ -3728,7 +3739,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v) else seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n", s, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), from_kuid_munged(seq_user_ns(f), sock_i_uid(s)), diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index d100aed3d06f..98a005d0d04a 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -144,9 +144,8 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" : ""); seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count, - tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0, + tunnel->sock ? refcount_read(&tunnel->sock->sk_refcnt) : 0, atomic_read(&tunnel->ref_count)); - seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n", tunnel->debug, atomic_long_read(&tunnel->stats.tx_packets), diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 9b02c13d258b..5e91b47f0d2a 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -507,7 +507,7 @@ again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_estab_match(sap, daddr, laddr, rc)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ - if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_estab_match(sap, daddr, laddr, rc))) { @@ -566,7 +566,7 @@ again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_listener_match(sap, laddr, rc)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ - if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_listener_match(sap, laddr, rc))) { @@ -973,9 +973,9 @@ void llc_sk_free(struct sock *sk) skb_queue_purge(&sk->sk_write_queue); skb_queue_purge(&llc->pdu_unack_q); #ifdef LLC_REFCNT_DEBUG - if (atomic_read(&sk->sk_refcnt) != 1) { + if (refcount_read(&sk->sk_refcnt) != 1) { printk(KERN_DEBUG "Destruction of LLC sock %p delayed in %s, cnt=%d\n", - sk, __func__, atomic_read(&sk->sk_refcnt)); + sk, __func__, refcount_read(&sk->sk_refcnt)); printk(KERN_DEBUG "%d LLC sockets are still alive\n", atomic_read(&llc_sock_nr)); } else { diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 63b6ab056370..d90928f50226 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -329,7 +329,7 @@ again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ - if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_dgram_match(sap, laddr, rc))) { diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c9b78e7b342f..913380919301 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,10 +70,9 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o # nf_tables -nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o -nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o -nf_tables-objs += nft_lookup.o nft_dynset.o +nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \ + nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \ + nft_byteorder.o nft_payload.o nft_lookup.o nft_dynset.o obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index ba6a5516dc7c..e495b5e484b1 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -841,14 +841,16 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static int ip_set_create(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; @@ -989,7 +991,8 @@ ip_set_destroy_set(struct ip_set *set) static int ip_set_destroy(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *s; @@ -1067,7 +1070,8 @@ ip_set_flush_set(struct ip_set *set) static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *s; @@ -1106,7 +1110,8 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { static int ip_set_rename(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *s; @@ -1155,7 +1160,8 @@ out: static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *from, *to; @@ -1428,7 +1434,8 @@ out: static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1513,7 +1520,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; @@ -1567,7 +1575,8 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; @@ -1621,7 +1630,8 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; @@ -1656,7 +1666,8 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, static int ip_set_header(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); const struct ip_set *set; @@ -1712,7 +1723,8 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1770,7 +1782,8 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { static int ip_set_protocol(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) { struct sk_buff *skb2; struct nlmsghdr *nlh2; diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 42c3e3ba1b94..3f09cdb42562 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -38,8 +38,8 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, break; } case IPPROTO_SCTP: { - sctp_sctphdr_t _sh; - const sctp_sctphdr_t *sh; + struct sctphdr _sh; + const struct sctphdr *sh; sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh); if (!sh) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index ad99c1ceea6f..e31956b58aba 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1037,9 +1037,9 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, */ static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len) { - sctp_chunkhdr_t *sch, schunk; - sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t), - sizeof(schunk), &schunk); + struct sctp_chunkhdr *sch, schunk; + sch = skb_header_pointer(skb, nh_len + sizeof(struct sctphdr), + sizeof(schunk), &schunk); if (sch == NULL) return 0; if (sch->type == SCTP_CID_ABORT) @@ -1070,9 +1070,9 @@ static inline bool is_new_conn(const struct sk_buff *skb, return th->syn; } case IPPROTO_SCTP: { - sctp_chunkhdr_t *sch, schunk; + struct sctp_chunkhdr *sch, schunk; - sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), + sch = skb_header_pointer(skb, iph->len + sizeof(struct sctphdr), sizeof(schunk), &schunk); if (sch == NULL) return false; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 56f8e4b204ff..3ffad4adaddf 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -15,16 +15,15 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_service *svc; - sctp_chunkhdr_t _schunkh, *sch; - sctp_sctphdr_t *sh, _sctph; + struct sctp_chunkhdr _schunkh, *sch; + struct sctphdr *sh, _sctph; __be16 _ports[2], *ports = NULL; if (likely(!ip_vs_iph_icmp(iph))) { sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); if (sh) { - sch = skb_header_pointer( - skb, iph->len + sizeof(sctp_sctphdr_t), - sizeof(_schunkh), &_schunkh); + sch = skb_header_pointer(skb, iph->len + sizeof(_sctph), + sizeof(_schunkh), &_schunkh); if (sch && (sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs))) ports = &sh->source; @@ -77,7 +76,7 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, return 1; } -static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph, +static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph, unsigned int sctphoff) { sctph->checksum = sctp_compute_cksum(skb, sctphoff); @@ -88,7 +87,7 @@ static int sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { - sctp_sctphdr_t *sctph; + struct sctphdr *sctph; unsigned int sctphoff = iph->len; bool payload_csum = false; @@ -135,7 +134,7 @@ static int sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { - sctp_sctphdr_t *sctph; + struct sctphdr *sctph; unsigned int sctphoff = iph->len; bool payload_csum = false; @@ -378,7 +377,7 @@ static inline void set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, const struct sk_buff *skb) { - sctp_chunkhdr_t _sctpch, *sch; + struct sctp_chunkhdr _sctpch, *sch; unsigned char chunk_type; int event, next_state; int ihl, cofs; @@ -389,7 +388,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ihl = ip_hdrlen(skb); #endif - cofs = ihl + sizeof(sctp_sctphdr_t); + cofs = ihl + sizeof(struct sctphdr); sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); if (sch == NULL) return; @@ -410,7 +409,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, (sch->type == SCTP_CID_COOKIE_ACK)) { int clen = ntohs(sch->length); - if (clen >= sizeof(sctp_chunkhdr_t)) { + if (clen >= sizeof(_sctpch)) { sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), sizeof(_sctpch), &_sctpch); if (sch && sch->type == SCTP_CID_ABORT) diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 03d2ccffa9fa..20edd589fe06 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -197,8 +197,8 @@ static void __exit nf_conntrack_amanda_fini(void) { int i; - nf_conntrack_helper_unregister(&amanda_helper[0]); - nf_conntrack_helper_unregister(&amanda_helper[1]); + nf_conntrack_helpers_unregister(amanda_helper, + ARRAY_SIZE(amanda_helper)); for (i = 0; i < ARRAY_SIZE(search); i++) textsearch_destroy(search[i].ts); } @@ -218,16 +218,12 @@ static int __init nf_conntrack_amanda_init(void) goto err1; } } - ret = nf_conntrack_helper_register(&amanda_helper[0]); + ret = nf_conntrack_helpers_register(amanda_helper, + ARRAY_SIZE(amanda_helper)); if (ret < 0) goto err1; - ret = nf_conntrack_helper_register(&amanda_helper[1]); - if (ret < 0) - goto err2; return 0; -err2: - nf_conntrack_helper_unregister(&amanda_helper[0]); err1: while (--i >= 0) textsearch_destroy(search[i].ts); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e847dbaa0c6b..9979f46c81dc 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1586,13 +1586,12 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) /* Bring out ya dead! */ static struct nf_conn * -get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), +get_next_corpse(int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct hlist_nulls_node *n; - int cpu; spinlock_t *lockp; for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { @@ -1604,8 +1603,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); - if (net_eq(nf_ct_net(ct), net) && - iter(ct, data)) + if (iter(ct, data)) goto found; } } @@ -1614,51 +1612,150 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), cond_resched(); } + return NULL; +found: + atomic_inc(&ct->ct_general.use); + spin_unlock(lockp); + local_bh_enable(); + return ct; +} + +static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), + void *data, u32 portid, int report) +{ + unsigned int bucket = 0, sequence; + struct nf_conn *ct; + + might_sleep(); + + for (;;) { + sequence = read_seqcount_begin(&nf_conntrack_generation); + + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + /* Time to push up daises... */ + + nf_ct_delete(ct, portid, report); + nf_ct_put(ct); + cond_resched(); + } + + if (!read_seqcount_retry(&nf_conntrack_generation, sequence)) + break; + bucket = 0; + } +} + +struct iter_data { + int (*iter)(struct nf_conn *i, void *data); + void *data; + struct net *net; +}; + +static int iter_net_only(struct nf_conn *i, void *data) +{ + struct iter_data *d = data; + + if (!net_eq(d->net, nf_ct_net(i))) + return 0; + + return d->iter(i, d->data); +} + +static void +__nf_ct_unconfirmed_destroy(struct net *net) +{ + int cpu; + for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; + struct ct_pcpu *pcpu; + + pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); spin_lock_bh(&pcpu->lock); hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { + struct nf_conn *ct; + ct = nf_ct_tuplehash_to_ctrack(h); - if (iter(ct, data)) - set_bit(IPS_DYING_BIT, &ct->status); + + /* we cannot call iter() on unconfirmed list, the + * owning cpu can reallocate ct->ext at any time. + */ + set_bit(IPS_DYING_BIT, &ct->status); } spin_unlock_bh(&pcpu->lock); cond_resched(); } - return NULL; -found: - atomic_inc(&ct->ct_general.use); - spin_unlock(lockp); - local_bh_enable(); - return ct; } -void nf_ct_iterate_cleanup(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report) +void nf_ct_iterate_cleanup_net(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data, u32 portid, int report) { - struct nf_conn *ct; - unsigned int bucket = 0; + struct iter_data d; might_sleep(); if (atomic_read(&net->ct.count) == 0) return; - while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { - /* Time to push up daises... */ + __nf_ct_unconfirmed_destroy(net); - nf_ct_delete(ct, portid, report); - nf_ct_put(ct); - cond_resched(); + d.iter = iter; + d.data = data; + d.net = net; + + synchronize_net(); + + nf_ct_iterate_cleanup(iter_net_only, &d, portid, report); +} +EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); + +/** + * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table + * @iter: callback to invoke for each conntrack + * @data: data to pass to @iter + * + * Like nf_ct_iterate_cleanup, but first marks conntracks on the + * unconfirmed list as dying (so they will not be inserted into + * main table). + * + * Can only be called in module exit path. + */ +void +nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) +{ + struct net *net; + + rtnl_lock(); + for_each_net(net) { + if (atomic_read(&net->ct.count) == 0) + continue; + __nf_ct_unconfirmed_destroy(net); } + rtnl_unlock(); + + /* Need to wait for netns cleanup worker to finish, if its + * running -- it might have deleted a net namespace from + * the global list, so our __nf_ct_unconfirmed_destroy() might + * not have affected all namespaces. + */ + net_ns_barrier(); + + /* a conntrack could have been unlinked from unconfirmed list + * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy(). + * This makes sure its inserted into conntrack table. + */ + synchronize_net(); + + nf_ct_iterate_cleanup(iter, data, 0, 0); } -EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); +EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy); static int kill_all(struct nf_conn *i, void *data) { - return 1; + return net_eq(nf_ct_net(i), data); } void nf_ct_free_hashtable(void *hash, unsigned int size) @@ -1723,7 +1820,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { - nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0); + nf_ct_iterate_cleanup(kill_all, net, 0, 0); if (atomic_read(&net->ct.count) != 0) busy = 1; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 3bcdc718484e..f71f0d2558fd 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1815,14 +1815,44 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { }, }; +static int __init h323_helper_init(void) +{ + int ret; + + ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245); + if (ret < 0) + return ret; + ret = nf_conntrack_helpers_register(nf_conntrack_helper_q931, + ARRAY_SIZE(nf_conntrack_helper_q931)); + if (ret < 0) + goto err1; + ret = nf_conntrack_helpers_register(nf_conntrack_helper_ras, + ARRAY_SIZE(nf_conntrack_helper_ras)); + if (ret < 0) + goto err2; + + return 0; +err2: + nf_conntrack_helpers_unregister(nf_conntrack_helper_q931, + ARRAY_SIZE(nf_conntrack_helper_q931)); +err1: + nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); + return ret; +} + +static void __exit h323_helper_exit(void) +{ + nf_conntrack_helpers_unregister(nf_conntrack_helper_ras, + ARRAY_SIZE(nf_conntrack_helper_ras)); + nf_conntrack_helpers_unregister(nf_conntrack_helper_q931, + ARRAY_SIZE(nf_conntrack_helper_q931)); + nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); +} + /****************************************************************************/ static void __exit nf_conntrack_h323_fini(void) { - nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[1]); - nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); - nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); - nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); - nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); + h323_helper_exit(); kfree(h323_buffer); pr_debug("nf_ct_h323: fini\n"); } @@ -1837,32 +1867,11 @@ static int __init nf_conntrack_h323_init(void) h323_buffer = kmalloc(65536, GFP_KERNEL); if (!h323_buffer) return -ENOMEM; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245); + ret = h323_helper_init(); if (ret < 0) goto err1; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]); - if (ret < 0) - goto err2; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]); - if (ret < 0) - goto err3; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]); - if (ret < 0) - goto err4; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); - if (ret < 0) - goto err5; pr_debug("nf_ct_h323: init success\n"); return 0; - -err5: - nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); -err4: - nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); -err3: - nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); -err2: - nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); err1: kfree(h323_buffer); return ret; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 7f6100ca63be..9129bb3b5153 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -285,16 +285,16 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); /* appropriate ct lock protecting must be taken by caller */ -static inline int unhelp(struct nf_conntrack_tuple_hash *i, - const struct nf_conntrack_helper *me) +static int unhelp(struct nf_conn *ct, void *me) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_help *help = nfct_help(ct); if (help && rcu_dereference_raw(help->helper) == me) { nf_conntrack_event(IPCT_HELPER, ct); RCU_INIT_POINTER(help->helper, NULL); } + + /* We are not intended to delete this conntrack. */ return 0; } @@ -437,33 +437,10 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); -static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, - struct net *net) -{ - struct nf_conntrack_tuple_hash *h; - const struct hlist_nulls_node *nn; - int cpu; - - /* Get rid of expecteds, set helpers to NULL. */ - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - - spin_lock_bh(&pcpu->lock); - hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) - unhelp(h, me); - spin_unlock_bh(&pcpu->lock); - } -} - void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { - struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; const struct hlist_node *next; - const struct hlist_nulls_node *nn; - unsigned int last_hsize; - spinlock_t *lock; - struct net *net; unsigned int i; mutex_lock(&nf_ct_helper_mutex); @@ -491,26 +468,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } spin_unlock_bh(&nf_conntrack_expect_lock); - rtnl_lock(); - for_each_net(net) - __nf_conntrack_helper_unregister(me, net); - rtnl_unlock(); - - local_bh_disable(); -restart: - last_hsize = nf_conntrack_htable_size; - for (i = 0; i < last_hsize; i++) { - lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS]; - nf_conntrack_lock(lock); - if (last_hsize != nf_conntrack_htable_size) { - spin_unlock(lock); - goto restart; - } - hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) - unhelp(h, me); - spin_unlock(lock); - } - local_bh_enable(); + nf_ct_iterate_destroy(unhelp, me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a8be9b72e6cd..7999e70c3bfb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -636,11 +636,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) if (events & (1 << IPCT_DESTROY)) { type = IPCTNL_MSG_CT_DELETE; group = NFNLGRP_CONNTRACK_DESTROY; - } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) { + } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) { type = IPCTNL_MSG_CT_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events) { + } else if (events) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else @@ -1122,8 +1122,8 @@ static int ctnetlink_flush_conntrack(struct net *net, return PTR_ERR(filter); } - nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter, - portid, report); + nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter, + portid, report); kfree(filter); return 0; @@ -1132,7 +1132,8 @@ static int ctnetlink_flush_conntrack(struct net *net, static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -1184,7 +1185,8 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -1345,7 +1347,8 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -1367,7 +1370,8 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -1906,7 +1910,8 @@ err1: static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -2071,7 +2076,8 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -2116,7 +2122,8 @@ nlmsg_failure: static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct sk_buff *skb2; int err; @@ -2778,7 +2785,8 @@ out: static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { int err; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -2822,7 +2830,8 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; @@ -2834,7 +2843,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, if (nlh->nlmsg_flags & NLM_F_DUMP) { if (cda[CTA_EXPECT_MASTER]) - return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda); + return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda, + extack); else { struct netlink_dump_control c = { .dump = ctnetlink_exp_dump_table, @@ -2902,7 +2912,8 @@ out: static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; @@ -3190,7 +3201,8 @@ err_ct: static int ctnetlink_new_expect(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; @@ -3296,7 +3308,8 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 2de6c1fe3261..1dcad229c3cc 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -28,8 +28,8 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> -static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly; -struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly; +static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly; +struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_l3protos); static DEFINE_MUTEX(nf_ct_proto_mutex); @@ -68,7 +68,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header, struct nf_conntrack_l4proto * __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto) { - if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL)) + if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL)) return &nf_conntrack_l4proto_generic; return rcu_dereference(nf_ct_protos[l3proto][l4proto]); @@ -212,7 +212,7 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) int ret = 0; struct nf_conntrack_l3proto *old; - if (proto->l3proto >= AF_MAX) + if (proto->l3proto >= NFPROTO_NUMPROTO) return -EBUSY; if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size) @@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) { - BUG_ON(proto->l3proto >= AF_MAX); + BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO); mutex_lock(&nf_ct_proto_mutex); BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], @@ -265,6 +265,8 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) mutex_unlock(&nf_ct_proto_mutex); synchronize_rcu(); + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_destroy(kill_l3proto, proto); } EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); @@ -280,9 +282,6 @@ void nf_ct_l3proto_pernet_unregister(struct net *net, */ if (proto->net_ns_put) proto->net_ns_put(net); - - /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); @@ -342,7 +341,7 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) { int ret = 0; - if (l4proto->l3proto >= PF_MAX) + if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos)) return -EBUSY; if ((l4proto->to_nlattr && !l4proto->nlattr_size) || @@ -421,17 +420,23 @@ out: } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one); -void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) +static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) + { - BUG_ON(l4proto->l3proto >= PF_MAX); + BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos)); - mutex_lock(&nf_ct_proto_mutex); BUG_ON(rcu_dereference_protected( nf_ct_protos[l4proto->l3proto][l4proto->l4proto], lockdep_is_held(&nf_ct_proto_mutex) ) != l4proto); rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], &nf_conntrack_l4proto_generic); +} + +void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) +{ + mutex_lock(&nf_ct_proto_mutex); + __nf_ct_l4proto_unregister_one(l4proto); mutex_unlock(&nf_ct_proto_mutex); synchronize_rcu(); @@ -448,9 +453,6 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net, pn->users--; nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); - - /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0); } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); @@ -500,8 +502,14 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], unsigned int num_proto) { + mutex_lock(&nf_ct_proto_mutex); while (num_proto-- != 0) - nf_ct_l4proto_unregister_one(l4proto[num_proto]); + __nf_ct_l4proto_unregister_one(l4proto[num_proto]); + mutex_unlock(&nf_ct_proto_mutex); + + synchronize_net(); + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_destroy(kill_l4proto, l4proto); } EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); @@ -548,7 +556,7 @@ void nf_conntrack_proto_pernet_fini(struct net *net) int nf_conntrack_proto_init(void) { unsigned int i; - for (i = 0; i < AF_MAX; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) rcu_assign_pointer(nf_ct_l3protos[i], &nf_conntrack_l3proto_generic); return 0; @@ -558,6 +566,6 @@ void nf_conntrack_proto_fini(void) { unsigned int i; /* free l3proto protocol tables */ - for (i = 0; i < PF_MAX; i++) + for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++) kfree(nf_ct_protos[i]); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 1c5b14a6cab3..31c6c8ee9d5d 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -190,7 +190,7 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) } #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ -for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0; \ +for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \ (offset) < (skb)->len && \ ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \ (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++) @@ -202,7 +202,7 @@ static int do_basic_checks(struct nf_conn *ct, unsigned long *map) { u_int32_t offset, count; - sctp_chunkhdr_t _sch, *sch; + struct sctp_chunkhdr _sch, *sch; int flag; flag = 0; @@ -395,9 +395,9 @@ static int sctp_packet(struct nf_conn *ct, /* If it is an INIT or an INIT ACK note down the vtag */ if (sch->type == SCTP_CID_INIT || sch->type == SCTP_CID_INIT_ACK) { - sctp_inithdr_t _inithdr, *ih; + struct sctp_inithdr _inithdr, *ih; - ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(_inithdr), &_inithdr); if (ih == NULL) goto out_unlock; @@ -471,23 +471,20 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Copy the vtag into the state info */ if (sch->type == SCTP_CID_INIT) { - if (sh->vtag == 0) { - sctp_inithdr_t _inithdr, *ih; + struct sctp_inithdr _inithdr, *ih; + /* Sec 8.5.1 (A) */ + if (sh->vtag) + return false; - ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), - sizeof(_inithdr), &_inithdr); - if (ih == NULL) - return false; + ih = skb_header_pointer(skb, offset + sizeof(_sch), + sizeof(_inithdr), &_inithdr); + if (!ih) + return false; - pr_debug("Setting vtag %x for new conn\n", - ih->init_tag); + pr_debug("Setting vtag %x for new conn\n", + ih->init_tag); - ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = - ih->init_tag; - } else { - /* Sec 8.5.1 (A) */ - return false; - } + ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag; } else if (sch->type == SCTP_CID_HEARTBEAT) { pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index c9d7f95768ab..f4a566e67213 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -13,6 +13,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_dup_netdev.h> static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6c72922d20ca..832c5a08d9a5 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -582,12 +582,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) .l3proto = l3proto, .l4proto = l4proto, }; - struct net *net; - rtnl_lock(); - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); - rtnl_unlock(); + nf_ct_iterate_destroy(nf_nat_proto_remove, &clean); } static void nf_nat_l3proto_clean(u8 l3proto) @@ -595,13 +591,8 @@ static void nf_nat_l3proto_clean(u8 l3proto) struct nf_nat_proto_clean clean = { .l3proto = l3proto, }; - struct net *net; - rtnl_lock(); - - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); - rtnl_unlock(); + nf_ct_iterate_destroy(nf_nat_proto_remove, &clean); } /* Protocol registration. */ @@ -822,17 +813,6 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, } #endif -static void __net_exit nf_nat_net_exit(struct net *net) -{ - struct nf_nat_proto_clean clean = {}; - - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0); -} - -static struct pernet_operations nf_nat_net_ops = { - .exit = nf_nat_net_exit, -}; - static struct nf_ct_helper_expectfn follow_master_nat = { .name = "nat-follow-master", .expectfn = nf_nat_follow_master, @@ -853,10 +833,6 @@ static int __init nf_nat_init(void) return ret; } - ret = register_pernet_subsys(&nf_nat_net_ops); - if (ret < 0) - goto cleanup_extend; - nf_ct_helper_expectfn_register(&follow_master_nat); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); @@ -867,18 +843,15 @@ static int __init nf_nat_init(void) RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); #endif return 0; - - cleanup_extend: - rhltable_destroy(&nf_nat_bysource_table); - nf_ct_extend_unregister(&nat_extend); - return ret; } static void __exit nf_nat_cleanup(void) { + struct nf_nat_proto_clean clean = {}; unsigned int i; - unregister_pernet_subsys(&nf_nat_net_ops); + nf_ct_iterate_destroy(nf_nat_proto_clean, &clean); + nf_ct_extend_unregister(&nat_extend); nf_ct_helper_expectfn_unregister(&follow_master_nat); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index 804e8a0ab36e..c57ee3240b1d 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -32,7 +32,7 @@ sctp_manip_pkt(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - sctp_sctphdr_t *hdr; + struct sctphdr *hdr; int hdrsize = 8; /* This could be an inner header returned in imcp packet; in such diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index da314be0c048..7843efa33c59 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -13,6 +13,7 @@ #include <linux/list.h> #include <linux/skbuff.h> #include <linux/netlink.h> +#include <linux/vmalloc.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> @@ -386,7 +387,7 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) return ++table->hgenerator; } -static const struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX]; +static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX]; static const struct nf_chain_type * __nf_tables_chain_type_lookup(int family, const struct nlattr *nla) @@ -534,7 +535,8 @@ done: static int nf_tables_gettable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); @@ -677,7 +679,8 @@ err: static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -830,7 +833,8 @@ out: static int nf_tables_deltable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -869,6 +873,9 @@ int nft_register_chain_type(const struct nf_chain_type *ctype) { int err = 0; + if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO)) + return -EINVAL; + nfnl_lock(NFNL_SUBSYS_NFTABLES); if (chain_type[ctype->family][ctype->type] != NULL) { err = -EBUSY; @@ -1123,7 +1130,8 @@ done: static int nf_tables_getchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); @@ -1319,7 +1327,8 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook) static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nlattr * uninitialized_var(name); @@ -1557,7 +1566,8 @@ err1: static int nf_tables_delchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -2038,7 +2048,8 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb) static int nf_tables_getrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); @@ -2131,7 +2142,8 @@ static struct nft_expr_info *info; static int nf_tables_newrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -2313,7 +2325,8 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, static int nf_tables_delrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -2377,64 +2390,77 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, * Sets */ -static LIST_HEAD(nf_tables_set_ops); +static LIST_HEAD(nf_tables_set_types); -int nft_register_set(struct nft_set_ops *ops) +int nft_register_set(struct nft_set_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail_rcu(&ops->list, &nf_tables_set_ops); + list_add_tail_rcu(&type->list, &nf_tables_set_types); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } EXPORT_SYMBOL_GPL(nft_register_set); -void nft_unregister_set(struct nft_set_ops *ops) +void nft_unregister_set(struct nft_set_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del_rcu(&ops->list); + list_del_rcu(&type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_set); +#define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \ + NFT_SET_TIMEOUT | NFT_SET_OBJECT) + +static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags) +{ + return (flags & ops->features) == (flags & NFT_SET_FEATURES); +} + /* * Select a set implementation based on the data characteristics and the * given policy. The total memory use might not be known if no size is * given, in that case the amount of memory per element is used. */ static const struct nft_set_ops * -nft_select_set_ops(const struct nlattr * const nla[], +nft_select_set_ops(const struct nft_ctx *ctx, + const struct nlattr * const nla[], const struct nft_set_desc *desc, enum nft_set_policies policy) { const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; - u32 features; + const struct nft_set_type *type; + u32 flags = 0; #ifdef CONFIG_MODULES - if (list_empty(&nf_tables_set_ops)) { + if (list_empty(&nf_tables_set_types)) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-set"); nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (!list_empty(&nf_tables_set_ops)) + if (!list_empty(&nf_tables_set_types)) return ERR_PTR(-EAGAIN); } #endif - features = 0; - if (nla[NFTA_SET_FLAGS] != NULL) { - features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); - features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT | - NFT_SET_OBJECT; - } + if (nla[NFTA_SET_FLAGS] != NULL) + flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); bops = NULL; best.size = ~0; best.lookup = ~0; best.space = ~0; - list_for_each_entry(ops, &nf_tables_set_ops, list) { - if ((ops->features & features) != features) + list_for_each_entry(type, &nf_tables_set_types, list) { + if (!type->select_ops) + ops = type->ops; + else + ops = type->select_ops(ctx, desc, flags); + if (!ops) + continue; + + if (!nft_set_ops_candidate(ops, flags)) continue; - if (!ops->estimate(desc, features, &est)) + if (!ops->estimate(desc, flags, &est)) continue; switch (policy) { @@ -2465,10 +2491,10 @@ nft_select_set_ops(const struct nlattr * const nla[], break; } - if (!try_module_get(ops->owner)) + if (!try_module_get(type->owner)) continue; if (bops != NULL) - module_put(bops->owner); + module_put(bops->type->owner); bops = ops; best = est; @@ -2816,7 +2842,8 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb) static int nf_tables_getset(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { u8 genmask = nft_genmask_cur(net); const struct nft_set *set; @@ -2892,7 +2919,8 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, static int nf_tables_newset(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -3029,7 +3057,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (!(nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(nla, &desc, policy); + ops = nft_select_set_ops(&ctx, nla, &desc, policy); if (IS_ERR(ops)) return PTR_ERR(ops); @@ -3039,12 +3067,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, size = 0; if (ops->privsize != NULL) - size = ops->privsize(nla); + size = ops->privsize(nla, &desc); - err = -ENOMEM; - set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); - if (set == NULL) + set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); + if (!set) { + err = -ENOMEM; goto err1; + } nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name)); err = nf_tables_set_alloc_name(&ctx, set, name); @@ -3087,17 +3116,17 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, err3: ops->destroy(set); err2: - kfree(set); + kvfree(set); err1: - module_put(ops->owner); + module_put(ops->type->owner); return err; } static void nft_set_destroy(struct nft_set *set) { set->ops->destroy(set); - module_put(set->ops->owner); - kfree(set); + module_put(set->ops->type->owner); + kvfree(set); } static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) @@ -3109,7 +3138,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set static int nf_tables_delset(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -3469,7 +3499,8 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb) static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { u8 genmask = nft_genmask_cur(net); const struct nft_set *set; @@ -3870,7 +3901,8 @@ err1: static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { u8 genmask = nft_genmask_next(net); const struct nlattr *attr; @@ -4067,7 +4099,8 @@ err1: static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { u8 genmask = nft_genmask_next(net); const struct nlattr *attr; @@ -4277,7 +4310,8 @@ static const struct nft_object_type *nft_obj_type_get(u32 objtype) static int nf_tables_newobj(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_object_type *type; @@ -4471,7 +4505,8 @@ nft_obj_filter_alloc(const struct nlattr * const nla[]) static int nf_tables_getobj(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); @@ -4549,8 +4584,9 @@ static void nft_obj_destroy(struct nft_object *obj) } static int nf_tables_delobj(struct net *net, struct sock *nlsk, - struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); @@ -4680,7 +4716,8 @@ err: static int nf_tables_getgen(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) { struct sk_buff *skb2; int err; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 80f5ecf2c3d7..92b05e188fd1 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -201,7 +201,8 @@ replay: if (nc->call_rcu) { err = nc->call_rcu(net, net->nfnl, skb, nlh, - (const struct nlattr **)cda); + (const struct nlattr **)cda, + extack); rcu_read_unlock(); } else { rcu_read_unlock(); @@ -211,7 +212,8 @@ replay: err = -EAGAIN; else if (nc->call) err = nc->call(net, net->nfnl, skb, nlh, - (const struct nlattr **)cda); + (const struct nlattr **)cda, + extack); else err = -EINVAL; nfnl_unlock(subsys_id); @@ -226,9 +228,11 @@ struct nfnl_err { struct list_head head; struct nlmsghdr *nlh; int err; + struct netlink_ext_ack extack; }; -static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err) +static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err, + const struct netlink_ext_ack *extack) { struct nfnl_err *nfnl_err; @@ -238,6 +242,7 @@ static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err) nfnl_err->nlh = nlh; nfnl_err->err = err; + nfnl_err->extack = *extack; list_add_tail(&nfnl_err->head, list); return 0; @@ -262,7 +267,8 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) struct nfnl_err *nfnl_err, *next; list_for_each_entry_safe(nfnl_err, next, err_list, head) { - netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL); + netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, + &nfnl_err->extack); nfnl_err_del(nfnl_err); } } @@ -280,6 +286,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; + struct netlink_ext_ack extack; LIST_HEAD(err_list); u32 status; int err; @@ -325,6 +332,7 @@ replay: while (skb->len >= nlmsg_total_size(0)) { int msglen, type; + memset(&extack, 0, sizeof(extack)); nlh = nlmsg_hdr(skb); err = 0; @@ -384,7 +392,8 @@ replay: if (nc->call_batch) { err = nc->call_batch(net, net->nfnl, skb, nlh, - (const struct nlattr **)cda); + (const struct nlattr **)cda, + &extack); } /* The lock was released to autoload some module, we @@ -402,7 +411,7 @@ ack: * processed, this avoids that the same error is * reported several times when replaying the batch. */ - if (nfnl_err_add(&err_list, nlh, err) < 0) { + if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) { /* We failed to enqueue an error, reset the * list of errors and send OOM to userspace * pointing to the batch header. diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 9898fb4d0512..c45e6d4358ab 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -49,7 +49,8 @@ struct nfacct_filter { static int nfnl_acct_new(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { struct nf_acct *nfacct, *matching = NULL; char *acct_name; @@ -264,7 +265,8 @@ nfacct_filter_alloc(const struct nlattr * const attr) static int nfnl_acct_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { int ret = -ENOENT; struct nf_acct *cur; @@ -343,7 +345,8 @@ static int nfnl_acct_try_del(struct nf_acct *cur) static int nfnl_acct_del(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { struct nf_acct *cur, *tmp; int ret = -ENOENT; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index be678a323598..41628b393673 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -398,7 +398,8 @@ nfnl_cthelper_update(const struct nlattr * const tb[], static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; @@ -599,7 +600,8 @@ out: static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { int ret = -ENOENT; struct nf_conntrack_helper *cur; @@ -666,7 +668,8 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { char *helper_name = NULL; struct nf_conntrack_helper *cur; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index a3e7bb54d96a..400e9ae97153 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -69,7 +69,8 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { __u16 l3num; __u8 l4num; @@ -239,7 +240,8 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { int ret = -ENOENT; char *name; @@ -287,49 +289,20 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, return ret; } -static void untimeout(struct nf_conntrack_tuple_hash *i, - struct ctnl_timeout *timeout) +static int untimeout(struct nf_conn *ct, void *timeout) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext && (!timeout || timeout_ext->timeout == timeout)) RCU_INIT_POINTER(timeout_ext->timeout, NULL); + + /* We are not intended to delete this conntrack. */ + return 0; } static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) { - struct nf_conntrack_tuple_hash *h; - const struct hlist_nulls_node *nn; - unsigned int last_hsize; - spinlock_t *lock; - int i, cpu; - - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); - - spin_lock_bh(&pcpu->lock); - hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) - untimeout(h, timeout); - spin_unlock_bh(&pcpu->lock); - } - - local_bh_disable(); -restart: - last_hsize = nf_conntrack_htable_size; - for (i = 0; i < last_hsize; i++) { - lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS]; - nf_conntrack_lock(lock); - if (last_hsize != nf_conntrack_htable_size) { - spin_unlock(lock); - goto restart; - } - - hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) - untimeout(h, timeout); - spin_unlock(lock); - } - local_bh_enable(); + nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0); } /* try to delete object, fail if it is still in use. */ @@ -355,7 +328,8 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { struct ctnl_timeout *cur, *tmp; int ret = -ENOENT; @@ -386,7 +360,8 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, static int cttimeout_default_set(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { __u16 l3num; __u8 l4num; @@ -475,7 +450,8 @@ nla_put_failure: static int cttimeout_default_get(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) + const struct nlattr * const cda[], + struct netlink_ext_ack *extack) { __u16 l3num; __u8 l4num; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 94ec0d0765a8..c684ba95dbb4 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -795,7 +795,8 @@ static struct notifier_block nfulnl_rtnl_notifier = { static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) + const struct nlattr * const nfqa[], + struct netlink_ext_ack *extack) { return -ENOTSUPP; } @@ -818,7 +819,8 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { static int nfulnl_recv_config(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfula[]) + const struct nlattr * const nfula[], + struct netlink_ext_ack *extack) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t group_num = ntohs(nfmsg->res_id); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 1b17a1b445a3..16fa04086880 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1032,7 +1032,8 @@ static int nfq_id_after(unsigned int id, unsigned int max) static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) + const struct nlattr * const nfqa[], + struct netlink_ext_ack *extack) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nf_queue_entry *entry, *tmp; @@ -1136,7 +1137,8 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry, static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) + const struct nlattr * const nfqa[], + struct netlink_ext_ack *extack) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); @@ -1200,7 +1202,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) + const struct nlattr * const nfqa[], + struct netlink_ext_ack *extack) { return -ENOTSUPP; } @@ -1217,7 +1220,8 @@ static const struct nf_queue_handler nfqh = { static int nfqnl_recv_config(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) + const struct nlattr * const nfqa[], + struct netlink_ext_ack *extack) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index f753ec69f790..f5a7cb68694e 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -530,7 +530,8 @@ nla_put_failure: static int nfnl_compat_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const tb[]) + const struct nlattr * const tb[], + struct netlink_ext_ack *extack) { int ret = 0, target; struct nfgenmsg *nfmsg; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index d3eb640bc784..c7383d8f88d0 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -23,9 +23,9 @@ struct nft_rt { enum nft_registers dreg:8; }; -void nft_rt_get_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_rt *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; @@ -72,9 +72,9 @@ const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { [NFTA_RT_KEY] = { .type = NLA_U32 }, }; -int nft_rt_get_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_rt_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_rt *priv = nft_expr_priv(expr); unsigned int len; @@ -103,8 +103,8 @@ int nft_rt_get_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, len); } -int nft_rt_get_dump(struct sk_buff *skb, - const struct nft_expr *expr) +static int nft_rt_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_rt *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index b988162b5b15..734989c40579 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -236,7 +236,8 @@ static inline u32 nft_bitmap_total_size(u32 klen) return sizeof(struct nft_bitmap) + nft_bitmap_size(klen); } -static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[]) +static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); @@ -278,7 +279,9 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, return true; } +static struct nft_set_type nft_bitmap_type; static struct nft_set_ops nft_bitmap_ops __read_mostly = { + .type = &nft_bitmap_type, .privsize = nft_bitmap_privsize, .elemsize = offsetof(struct nft_bitmap_elem, ext), .estimate = nft_bitmap_estimate, @@ -291,17 +294,21 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = { .activate = nft_bitmap_activate, .lookup = nft_bitmap_lookup, .walk = nft_bitmap_walk, +}; + +static struct nft_set_type nft_bitmap_type __read_mostly = { + .ops = &nft_bitmap_ops, .owner = THIS_MODULE, }; static int __init nft_bitmap_module_init(void) { - return nft_register_set(&nft_bitmap_ops); + return nft_register_set(&nft_bitmap_type); } static void __exit nft_bitmap_module_exit(void) { - nft_unregister_set(&nft_bitmap_ops); + nft_unregister_set(&nft_bitmap_type); } module_init(nft_bitmap_module_init); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 3d3a6df4ce70..0fa01d772c5e 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -22,45 +22,43 @@ #include <net/netfilter/nf_tables.h> /* We target a hash table size of 4, element hint is 75% of final size */ -#define NFT_HASH_ELEMENT_HINT 3 +#define NFT_RHASH_ELEMENT_HINT 3 -struct nft_hash { +struct nft_rhash { struct rhashtable ht; struct delayed_work gc_work; }; -struct nft_hash_elem { +struct nft_rhash_elem { struct rhash_head node; struct nft_set_ext ext; }; -struct nft_hash_cmp_arg { +struct nft_rhash_cmp_arg { const struct nft_set *set; const u32 *key; u8 genmask; }; -static const struct rhashtable_params nft_hash_params; - -static inline u32 nft_hash_key(const void *data, u32 len, u32 seed) +static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed) { - const struct nft_hash_cmp_arg *arg = data; + const struct nft_rhash_cmp_arg *arg = data; return jhash(arg->key, len, seed); } -static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) +static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed) { - const struct nft_hash_elem *he = data; + const struct nft_rhash_elem *he = data; return jhash(nft_set_ext_key(&he->ext), len, seed); } -static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, - const void *ptr) +static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) { - const struct nft_hash_cmp_arg *x = arg->key; - const struct nft_hash_elem *he = ptr; + const struct nft_rhash_cmp_arg *x = arg->key; + const struct nft_rhash_elem *he = ptr; if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; @@ -71,41 +69,49 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, return 0; } -static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +static const struct rhashtable_params nft_rhash_params = { + .head_offset = offsetof(struct nft_rhash_elem, node), + .hashfn = nft_rhash_key, + .obj_hashfn = nft_rhash_obj, + .obj_cmpfn = nft_rhash_cmp, + .automatic_shrinking = true, +}; + +static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) { - struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_elem *he; - struct nft_hash_cmp_arg arg = { + struct nft_rhash *priv = nft_set_priv(set); + const struct nft_rhash_elem *he; + struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_cur(net), .set = set, .key = key, }; - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); if (he != NULL) *ext = &he->ext; return !!he; } -static bool nft_hash_update(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, - const struct nft_expr *, - struct nft_regs *regs), - const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_set_ext **ext) +static bool nft_rhash_update(struct nft_set *set, const u32 *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_regs *regs), + const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_set_ext **ext) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he, *prev; - struct nft_hash_cmp_arg arg = { + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he, *prev; + struct nft_rhash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, .key = key, }; - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); if (he != NULL) goto out; @@ -114,7 +120,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, goto err1; prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params); + nft_rhash_params); if (IS_ERR(prev)) goto err2; @@ -134,21 +140,21 @@ err1: return false; } -static int nft_hash_insert(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, - struct nft_set_ext **ext) +static int nft_rhash_insert(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, + struct nft_set_ext **ext) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he = elem->priv; - struct nft_hash_cmp_arg arg = { + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he = elem->priv; + struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, }; - struct nft_hash_elem *prev; + struct nft_rhash_elem *prev; prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params); + nft_rhash_params); if (IS_ERR(prev)) return PTR_ERR(prev); if (prev) { @@ -158,19 +164,19 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, return 0; } -static void nft_hash_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_rhash_activate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash_elem *he = elem->priv; + struct nft_rhash_elem *he = elem->priv; nft_set_elem_change_active(net, set, &he->ext); nft_set_elem_clear_busy(&he->ext); } -static bool nft_hash_flush(const struct net *net, - const struct nft_set *set, void *priv) +static bool nft_rhash_flush(const struct net *net, + const struct nft_set *set, void *priv) { - struct nft_hash_elem *he = priv; + struct nft_rhash_elem *he = priv; if (!nft_set_elem_mark_busy(&he->ext) || !nft_is_active(net, &he->ext)) { @@ -180,22 +186,22 @@ static bool nft_hash_flush(const struct net *net, return false; } -static void *nft_hash_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static void *nft_rhash_deactivate(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; - struct nft_hash_cmp_arg arg = { + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he; + struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, }; rcu_read_lock(); - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); if (he != NULL && - !nft_hash_flush(net, set, he)) + !nft_rhash_flush(net, set, he)) he = NULL; rcu_read_unlock(); @@ -203,21 +209,21 @@ static void *nft_hash_deactivate(const struct net *net, return he; } -static void nft_hash_remove(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_rhash_remove(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he = elem->priv; + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he = elem->priv; - rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); + rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); } -static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_iter *iter) +static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_iter *iter) { - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; int err; @@ -266,16 +272,16 @@ out: rhashtable_walk_exit(&hti); } -static void nft_hash_gc(struct work_struct *work) +static void nft_rhash_gc(struct work_struct *work) { struct nft_set *set; - struct nft_hash_elem *he; - struct nft_hash *priv; + struct nft_rhash_elem *he; + struct nft_rhash *priv; struct nft_set_gc_batch *gcb = NULL; struct rhashtable_iter hti; int err; - priv = container_of(work, struct nft_hash, gc_work.work); + priv = container_of(work, struct nft_rhash, gc_work.work); set = nft_set_container_of(priv); err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); @@ -301,7 +307,7 @@ static void nft_hash_gc(struct work_struct *work) gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); if (gcb == NULL) goto out; - rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); + rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); atomic_dec(&set->nelems); nft_set_gc_batch_add(gcb, he); } @@ -315,82 +321,290 @@ schedule: nft_set_gc_interval(set)); } -static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) +static unsigned int nft_rhash_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { - return sizeof(struct nft_hash); + return sizeof(struct nft_rhash); } -static const struct rhashtable_params nft_hash_params = { - .head_offset = offsetof(struct nft_hash_elem, node), - .hashfn = nft_hash_key, - .obj_hashfn = nft_hash_obj, - .obj_cmpfn = nft_hash_cmp, - .automatic_shrinking = true, -}; - -static int nft_hash_init(const struct nft_set *set, - const struct nft_set_desc *desc, - const struct nlattr * const tb[]) +static int nft_rhash_init(const struct nft_set *set, + const struct nft_set_desc *desc, + const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_set_priv(set); - struct rhashtable_params params = nft_hash_params; + struct nft_rhash *priv = nft_set_priv(set); + struct rhashtable_params params = nft_rhash_params; int err; - params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; + params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT; params.key_len = set->klen; err = rhashtable_init(&priv->ht, ¶ms); if (err < 0) return err; - INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc); + INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc); if (set->flags & NFT_SET_TIMEOUT) queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); return 0; } -static void nft_hash_elem_destroy(void *ptr, void *arg) +static void nft_rhash_elem_destroy(void *ptr, void *arg) { nft_set_elem_destroy(arg, ptr, true); } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_rhash_destroy(const struct nft_set *set) { - struct nft_hash *priv = nft_set_priv(set); + struct nft_rhash *priv = nft_set_priv(set); cancel_delayed_work_sync(&priv->gc_work); - rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, + rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, (void *)set); } -static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, - struct nft_set_estimate *est) +static u32 nft_hash_buckets(u32 size) { - unsigned int esize; + return roundup_pow_of_two(size * 4 / 3); +} - esize = sizeof(struct nft_hash_elem); - if (desc->size) { - est->size = sizeof(struct nft_hash) + - roundup_pow_of_two(desc->size * 4 / 3) * - sizeof(struct nft_hash_elem *) + - desc->size * esize; - } else { - /* Resizing happens when the load drops below 30% or goes - * above 75%. The average of 52.5% load (approximated by 50%) - * is used for the size estimation of the hash buckets, - * meaning we calculate two buckets per element. - */ - est->size = esize + 2 * sizeof(struct nft_hash_elem *); +static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + est->size = ~0; + est->lookup = NFT_SET_CLASS_O_1; + est->space = NFT_SET_CLASS_O_N; + + return true; +} + +struct nft_hash { + u32 seed; + u32 buckets; + struct hlist_head table[]; +}; + +struct nft_hash_elem { + struct hlist_node node; + struct nft_set_ext ext; +}; + +static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + const struct nft_hash_elem *he; + u32 hash; + + hash = jhash(key, set->klen, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry_rcu(he, &priv->table[hash], node) { + if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) && + nft_set_elem_active(&he->ext, genmask)) { + *ext = &he->ext; + return true; + } + } + return false; +} + +/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */ +static inline u32 nft_hash_key(const u32 *key, u32 klen) +{ + if (klen == 4) + return *key; + + return *(u16 *)key; +} + +static bool nft_hash_lookup_fast(const struct net *net, + const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + const struct nft_hash_elem *he; + u32 hash, k1, k2; + + k1 = nft_hash_key(key, set->klen); + hash = jhash_1word(k1, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry_rcu(he, &priv->table[hash], node) { + k2 = nft_hash_key(nft_set_ext_key(&he->ext)->data, set->klen); + if (k1 == k2 && + nft_set_elem_active(&he->ext, genmask)) { + *ext = &he->ext; + return true; + } + } + return false; +} + +static int nft_hash_insert(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, + struct nft_set_ext **ext) +{ + struct nft_hash_elem *this = elem->priv, *he; + struct nft_hash *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); + u32 hash; + + hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry(he, &priv->table[hash], node) { + if (!memcmp(nft_set_ext_key(&this->ext), + nft_set_ext_key(&he->ext), set->klen) && + nft_set_elem_active(&he->ext, genmask)) { + *ext = &he->ext; + return -EEXIST; + } + } + hlist_add_head_rcu(&this->node, &priv->table[hash]); + return 0; +} + +static void nft_hash_activate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash_elem *he = elem->priv; + + nft_set_elem_change_active(net, set, &he->ext); +} + +static bool nft_hash_flush(const struct net *net, + const struct nft_set *set, void *priv) +{ + struct nft_hash_elem *he = priv; + + nft_set_elem_change_active(net, set, &he->ext); + return true; +} + +static void *nft_hash_deactivate(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *this = elem->priv, *he; + u8 genmask = nft_genmask_next(net); + u32 hash; + + hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry(he, &priv->table[hash], node) { + if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, + set->klen) || + nft_set_elem_active(&he->ext, genmask)) { + nft_set_elem_change_active(net, set, &he->ext); + return he; + } } + return NULL; +} + +static void nft_hash_remove(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash_elem *he = elem->priv; + + hlist_del_rcu(&he->node); +} + +static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_iter *iter) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct nft_set_elem elem; + int i; + + for (i = 0; i < priv->buckets; i++) { + hlist_for_each_entry_rcu(he, &priv->table[i], node) { + if (iter->count < iter->skip) + goto cont; + if (!nft_set_elem_active(&he->ext, iter->genmask)) + goto cont; + + elem.priv = he; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + return; +cont: + iter->count++; + } + } +} + +static unsigned int nft_hash_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) +{ + return sizeof(struct nft_hash) + + nft_hash_buckets(desc->size) * sizeof(struct hlist_head); +} +static int nft_hash_init(const struct nft_set *set, + const struct nft_set_desc *desc, + const struct nlattr * const tb[]) +{ + struct nft_hash *priv = nft_set_priv(set); + + priv->buckets = nft_hash_buckets(desc->size); + get_random_bytes(&priv->seed, sizeof(priv->seed)); + + return 0; +} + +static void nft_hash_destroy(const struct nft_set *set) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct hlist_node *next; + int i; + + for (i = 0; i < priv->buckets; i++) { + hlist_for_each_entry_safe(he, next, &priv->table[i], node) { + hlist_del_rcu(&he->node); + nft_set_elem_destroy(set, he, true); + } + } +} + +static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + est->size = sizeof(struct nft_hash) + + nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + + desc->size * sizeof(struct nft_hash_elem); est->lookup = NFT_SET_CLASS_O_1; est->space = NFT_SET_CLASS_O_N; return true; } +static struct nft_set_type nft_hash_type; +static struct nft_set_ops nft_rhash_ops __read_mostly = { + .type = &nft_hash_type, + .privsize = nft_rhash_privsize, + .elemsize = offsetof(struct nft_rhash_elem, ext), + .estimate = nft_rhash_estimate, + .init = nft_rhash_init, + .destroy = nft_rhash_destroy, + .insert = nft_rhash_insert, + .activate = nft_rhash_activate, + .deactivate = nft_rhash_deactivate, + .flush = nft_rhash_flush, + .remove = nft_rhash_remove, + .lookup = nft_rhash_lookup, + .update = nft_rhash_update, + .walk = nft_rhash_walk, + .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, +}; + static struct nft_set_ops nft_hash_ops __read_mostly = { + .type = &nft_hash_type, .privsize = nft_hash_privsize, .elemsize = offsetof(struct nft_hash_elem, ext), .estimate = nft_hash_estimate, @@ -402,20 +616,57 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .flush = nft_hash_flush, .remove = nft_hash_remove, .lookup = nft_hash_lookup, - .update = nft_hash_update, .walk = nft_hash_walk, - .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, + .features = NFT_SET_MAP | NFT_SET_OBJECT, +}; + +static struct nft_set_ops nft_hash_fast_ops __read_mostly = { + .type = &nft_hash_type, + .privsize = nft_hash_privsize, + .elemsize = offsetof(struct nft_hash_elem, ext), + .estimate = nft_hash_estimate, + .init = nft_hash_init, + .destroy = nft_hash_destroy, + .insert = nft_hash_insert, + .activate = nft_hash_activate, + .deactivate = nft_hash_deactivate, + .flush = nft_hash_flush, + .remove = nft_hash_remove, + .lookup = nft_hash_lookup_fast, + .walk = nft_hash_walk, + .features = NFT_SET_MAP | NFT_SET_OBJECT, +}; + +static const struct nft_set_ops * +nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, + u32 flags) +{ + if (desc->size) { + switch (desc->klen) { + case 2: + case 4: + return &nft_hash_fast_ops; + default: + return &nft_hash_ops; + } + } + + return &nft_rhash_ops; +} + +static struct nft_set_type nft_hash_type __read_mostly = { + .select_ops = nft_hash_select_ops, .owner = THIS_MODULE, }; static int __init nft_hash_module_init(void) { - return nft_register_set(&nft_hash_ops); + return nft_register_set(&nft_hash_type); } static void __exit nft_hash_module_exit(void) { - nft_unregister_set(&nft_hash_ops); + nft_unregister_set(&nft_hash_type); } module_init(nft_hash_module_init); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fbdbaa00dd5f..bce5382f1d49 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -251,7 +251,8 @@ cont: read_unlock_bh(&priv->lock); } -static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) +static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { return sizeof(struct nft_rbtree); } @@ -283,13 +284,11 @@ static void nft_rbtree_destroy(const struct nft_set *set) static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est) { - unsigned int nsize; - - nsize = sizeof(struct nft_rbtree_elem); if (desc->size) - est->size = sizeof(struct nft_rbtree) + desc->size * nsize; + est->size = sizeof(struct nft_rbtree) + + desc->size * sizeof(struct nft_rbtree_elem); else - est->size = nsize; + est->size = ~0; est->lookup = NFT_SET_CLASS_O_LOG_N; est->space = NFT_SET_CLASS_O_N; @@ -297,7 +296,9 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, return true; } +static struct nft_set_type nft_rbtree_type; static struct nft_set_ops nft_rbtree_ops __read_mostly = { + .type = &nft_rbtree_type, .privsize = nft_rbtree_privsize, .elemsize = offsetof(struct nft_rbtree_elem, ext), .estimate = nft_rbtree_estimate, @@ -311,17 +312,21 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, +}; + +static struct nft_set_type nft_rbtree_type __read_mostly = { + .ops = &nft_rbtree_ops, .owner = THIS_MODULE, }; static int __init nft_rbtree_module_init(void) { - return nft_register_set(&nft_rbtree_ops); + return nft_register_set(&nft_rbtree_type); } static void __exit nft_rbtree_module_exit(void) { - nft_unregister_set(&nft_rbtree_ops); + nft_unregister_set(&nft_rbtree_type); } module_init(nft_rbtree_module_init); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index df7f1df00330..d767e35fff6b 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -127,7 +127,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, daddr, dport, in->ifindex); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in @@ -197,7 +197,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, daddr, ntohs(dport), in->ifindex); - if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index c05fefcec238..71cfa9551d08 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -63,7 +63,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = { static int xt_osf_add_callback(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const osf_attrs[]) + const struct nlattr * const osf_attrs[], + struct netlink_ext_ack *extack) { struct xt_osf_user_finger *f; struct xt_osf_finger *kf = NULL, *sf; @@ -107,7 +108,8 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl, static int xt_osf_remove_callback(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const osf_attrs[]) + const struct nlattr * const osf_attrs[], + struct netlink_ext_ack *extack) { struct xt_osf_user_finger *f; struct xt_osf_finger *sf; diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 4dedb96d1a06..2d2fa1d53ea6 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -42,8 +42,8 @@ match_packet(const struct sk_buff *skb, bool *hotdrop) { u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)]; - const sctp_chunkhdr_t *sch; - sctp_chunkhdr_t _sch; + const struct sctp_chunkhdr *sch; + struct sctp_chunkhdr _sch; int chunk_match_type = info->chunk_match_type; const struct xt_sctp_flag_info *flag_info = info->flag_info; int flag_count = info->flag_count; @@ -118,8 +118,8 @@ static bool sctp_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_sctp_info *info = par->matchinfo; - const sctp_sctphdr_t *sh; - sctp_sctphdr_t _sh; + const struct sctphdr *sh; + struct sctphdr _sh; if (par->fragoff != 0) { pr_debug("Dropping non-first fragment.. FIXME\n"); @@ -136,13 +136,13 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par) return SCCHECK(ntohs(sh->source) >= info->spts[0] && ntohs(sh->source) <= info->spts[1], - XT_SCTP_SRC_PORTS, info->flags, info->invflags) - && SCCHECK(ntohs(sh->dest) >= info->dpts[0] + XT_SCTP_SRC_PORTS, info->flags, info->invflags) && + SCCHECK(ntohs(sh->dest) >= info->dpts[0] && ntohs(sh->dest) <= info->dpts[1], - XT_SCTP_DEST_PORTS, info->flags, info->invflags) - && SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t), - info, &par->hotdrop), - XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); + XT_SCTP_DEST_PORTS, info->flags, info->invflags) && + SCCHECK(match_packet(skb, par->thoff + sizeof(_sh), + info, &par->hotdrop), + XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); } static int sctp_mt_check(const struct xt_mtchk_param *par) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a88745e4b7df..5acee49db90b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -372,7 +372,7 @@ static void netlink_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(nlk_sk(sk)->groups); } @@ -575,7 +575,7 @@ static void netlink_remove(struct sock *sk) table = &nl_table[sk->sk_protocol]; if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node, netlink_rhashtable_params)) { - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } @@ -691,7 +691,7 @@ static void deferred_put_nlk_sk(struct rcu_head *head) struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); struct sock *sk = &nlk->sk; - if (!atomic_dec_and_test(&sk->sk_refcnt)) + if (!refcount_dec_and_test(&sk->sk_refcnt)) return; if (nlk->cb_running && nlk->cb.done) { @@ -1848,7 +1848,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } if (dst_group) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); } err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); @@ -2226,7 +2226,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_sock *nlk; int ret; - atomic_inc(&skb->users); + refcount_inc(&skb->users); sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { @@ -2431,7 +2431,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, int exclude_portid = 0; if (report) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); exclude_portid = portid; } @@ -2568,7 +2568,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), nlk->cb_running, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), atomic_read(&s->sk_drops), sock_i_ino(s) ); diff --git a/net/nfc/core.c b/net/nfc/core.c index 122bb81da918..5cf33df888c3 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -982,6 +982,8 @@ static void nfc_release(struct device *d) kfree(se); } + ida_simple_remove(&nfc_index_ida, dev->idx); + kfree(dev); } @@ -1056,6 +1058,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; + int rc; if (!ops->start_poll || !ops->stop_poll || !ops->activate_target || !ops->deactivate_target || !ops->im_transceive) @@ -1068,6 +1071,15 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, if (!dev) return NULL; + rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL); + if (rc < 0) + goto err_free_dev; + dev->idx = rc; + + dev->dev.class = &nfc_class; + dev_set_name(&dev->dev, "nfc%d", dev->idx); + device_initialize(&dev->dev); + dev->ops = ops; dev->supported_protocols = supported_protocols; dev->tx_headroom = tx_headroom; @@ -1090,6 +1102,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, } return dev; + +err_free_dev: + kfree(dev); + + return ERR_PTR(rc); } EXPORT_SYMBOL(nfc_allocate_device); @@ -1104,14 +1121,6 @@ int nfc_register_device(struct nfc_dev *dev) pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - dev->idx = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL); - if (dev->idx < 0) - return dev->idx; - - dev->dev.class = &nfc_class; - dev_set_name(&dev->dev, "nfc%d", dev->idx); - device_initialize(&dev->dev); - mutex_lock(&nfc_devlist_mutex); nfc_devlist_generation++; rc = device_add(&dev->dev); @@ -1149,12 +1158,10 @@ EXPORT_SYMBOL(nfc_register_device); */ void nfc_unregister_device(struct nfc_dev *dev) { - int rc, id; + int rc; pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - id = dev->idx; - if (dev->rfkill) { rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); @@ -1179,8 +1186,6 @@ void nfc_unregister_device(struct nfc_dev *dev) nfc_devlist_generation++; device_del(&dev->dev); mutex_unlock(&nfc_devlist_mutex); - - ida_simple_remove(&nfc_index_ida, id); } EXPORT_SYMBOL(nfc_unregister_device); diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index ebeace7a8278..de6dd37d04c7 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -240,7 +240,7 @@ int digital_send_cmd(struct nfc_digital_dev *ddev, u8 cmd_type, { struct digital_cmd *cmd; - cmd = kzalloc(sizeof(struct digital_cmd), GFP_KERNEL); + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -287,7 +287,7 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) { struct digital_tg_mdaa_params *params; - params = kzalloc(sizeof(struct digital_tg_mdaa_params), GFP_KERNEL); + params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; @@ -706,11 +706,9 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, struct digital_data_exch *data_exch; int rc; - data_exch = kzalloc(sizeof(struct digital_data_exch), GFP_KERNEL); - if (!data_exch) { - pr_err("Failed to allocate data_exch struct\n"); + data_exch = kzalloc(sizeof(*data_exch), GFP_KERNEL); + if (!data_exch) return -ENOMEM; - } data_exch->cb = cb; data_exch->cb_context = cb_context; @@ -764,7 +762,7 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops, !ops->switch_rf || (ops->tg_listen_md && !ops->tg_get_rf_tech)) return NULL; - ddev = kzalloc(sizeof(struct nfc_digital_dev), GFP_KERNEL); + ddev = kzalloc(sizeof(*ddev), GFP_KERNEL); if (!ddev) return NULL; diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c index 74ccc2dd79d0..4f9a973988b2 100644 --- a/net/nfc/digital_dep.c +++ b/net/nfc/digital_dep.c @@ -151,7 +151,7 @@ static const u8 digital_payload_bits_map[4] = { * 0 <= wt <= 14 (given by the target by the TO field of ATR_RES response) */ #define DIGITAL_NFC_DEP_IN_MAX_WT 14 -#define DIGITAL_NFC_DEP_TG_MAX_WT 8 +#define DIGITAL_NFC_DEP_TG_MAX_WT 14 static const u16 digital_rwt_map[DIGITAL_NFC_DEP_IN_MAX_WT + 1] = { 100, 101, 101, 102, 105, 110, 119, 139, 177, 255, diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c index 3cc3448da524..2021d1d58a75 100644 --- a/net/nfc/digital_technology.c +++ b/net/nfc/digital_technology.c @@ -27,6 +27,7 @@ #define DIGITAL_SDD_RES_CT 0x88 #define DIGITAL_SDD_RES_LEN 5 +#define DIGITAL_SEL_RES_LEN 1 #define DIGITAL_SEL_RES_NFCID1_COMPLETE(sel_res) (!((sel_res) & 0x04)) #define DIGITAL_SEL_RES_IS_T2T(sel_res) (!((sel_res) & 0x60)) @@ -299,7 +300,7 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg, } } - if (!resp->len) { + if (resp->len != DIGITAL_SEL_RES_LEN) { rc = -EIO; goto exit; } diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 2ffb18e73df6..fb7afcaa3004 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -77,7 +77,8 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) struct sockaddr_nfc_llcp llcp_addr; int len, ret = 0; - if (!addr || addr->sa_family != AF_NFC) + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_NFC) return -EINVAL; pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); @@ -151,7 +152,8 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_nfc_llcp llcp_addr; int len, ret = 0; - if (!addr || addr->sa_family != AF_NFC) + if (!addr || alen < offsetofend(struct sockaddr, sa_family) || + addr->sa_family != AF_NFC) return -EINVAL; pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); @@ -662,8 +664,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags); - if (!addr || len < sizeof(struct sockaddr_nfc) || - addr->sa_family != AF_NFC) + if (!addr || len < sizeof(*addr) || addr->sa_family != AF_NFC) return -EINVAL; if (addr->service_name_len == 0 && addr->dsap == 0) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index a3dac34cf790..c25e9b4179c3 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -73,11 +73,10 @@ int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, if (conn_info->dest_type == dest_type) { if (!params) return conn_info->conn_id; - if (conn_info) { - if (params->id == conn_info->dest_params->id && - params->protocol == conn_info->dest_params->protocol) - return conn_info->conn_id; - } + + if (params->id == conn_info->dest_params->id && + params->protocol == conn_info->dest_params->protocol) + return conn_info->conn_id; } } @@ -1173,8 +1172,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, return ndev; free_nfc: - kfree(ndev->nfc_dev); - + nfc_free_device(ndev->nfc_dev); free_nci: kfree(ndev); return NULL; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 6b0850e63e09..b251fb936a27 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -907,7 +907,9 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) u32 device_idx, target_idx, protocol; int rc; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_TARGET_INDEX] || + !info->attrs[NFC_ATTR_PROTOCOLS]) return -EINVAL; device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d772e9a4b4f8..45fe8c8a884d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1090,6 +1090,58 @@ static struct sw_flow_actions *get_flow_actions(struct net *net, return acts; } +/* Factor out match-init and action-copy to avoid + * "Wframe-larger-than=1024" warning. Because mask is only + * used to get actions, we new a function to save some + * stack space. + * + * If there are not key and action attrs, we return 0 + * directly. In the case, the caller will also not use the + * match as before. If there is action attr, we try to get + * actions and save them to *acts. Before returning from + * the function, we reset the match->mask pointer. Because + * we should not to return match object with dangling reference + * to mask. + * */ +static int ovs_nla_init_match_and_action(struct net *net, + struct sw_flow_match *match, + struct sw_flow_key *key, + struct nlattr **a, + struct sw_flow_actions **acts, + bool log) +{ + struct sw_flow_mask mask; + int error = 0; + + if (a[OVS_FLOW_ATTR_KEY]) { + ovs_match_init(match, key, true, &mask); + error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY], + a[OVS_FLOW_ATTR_MASK], log); + if (error) + goto error; + } + + if (a[OVS_FLOW_ATTR_ACTIONS]) { + if (!a[OVS_FLOW_ATTR_KEY]) { + OVS_NLERR(log, + "Flow key attribute not present in set flow."); + return -EINVAL; + } + + *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key, + &mask, log); + if (IS_ERR(*acts)) { + error = PTR_ERR(*acts); + goto error; + } + } + + /* On success, error is 0. */ +error: + match->mask = NULL; + return error; +} + static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); @@ -1097,7 +1149,6 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; struct sw_flow *flow; - struct sw_flow_mask mask; struct sk_buff *reply = NULL; struct datapath *dp; struct sw_flow_actions *old_acts = NULL, *acts = NULL; @@ -1109,34 +1160,18 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) bool ufid_present; ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); - if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, true, &mask); - error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], - a[OVS_FLOW_ATTR_MASK], log); - } else if (!ufid_present) { + if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) { OVS_NLERR(log, "Flow set message rejected, Key attribute missing."); - error = -EINVAL; + return -EINVAL; } + + error = ovs_nla_init_match_and_action(net, &match, &key, a, + &acts, log); if (error) goto error; - /* Validate actions. */ - if (a[OVS_FLOW_ATTR_ACTIONS]) { - if (!a[OVS_FLOW_ATTR_KEY]) { - OVS_NLERR(log, - "Flow key attribute not present in set flow."); - error = -EINVAL; - goto error; - } - - acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key, - &mask, log); - if (IS_ERR(acts)) { - error = PTR_ERR(acts); - goto error; - } - + if (acts) { /* Can allocate before locking if have acts. */ reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, ufid_flags); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f9349a495caf..e3beb28203eb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1317,7 +1317,7 @@ static void packet_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_error_queue); WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive packet socket: %p\n", sk); @@ -1739,7 +1739,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->flags = flags; INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); - atomic_set(&match->sk_ref, 0); + refcount_set(&match->sk_ref, 0); fanout_init_data(match); match->prot_hook.type = po->prot_hook.type; match->prot_hook.dev = po->prot_hook.dev; @@ -1753,10 +1753,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; - if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) { + if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { __dev_remove_pack(&po->prot_hook); po->fanout = match; - atomic_inc(&match->sk_ref); + refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); __fanout_link(sk, po); err = 0; } @@ -1785,7 +1785,7 @@ static struct packet_fanout *fanout_release(struct sock *sk) if (f) { po->fanout = NULL; - if (atomic_dec_and_test(&f->sk_ref)) + if (refcount_dec_and_test(&f->sk_ref)) list_del(&f->list); else f = NULL; @@ -2523,7 +2523,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->data_len = to_write; skb->len += to_write; skb->truesize += to_write; - atomic_add(to_write, &po->sk.sk_wmem_alloc); + refcount_add(to_write, &po->sk.sk_wmem_alloc); while (likely(to_write)) { nr_frags = skb_shinfo(skb)->nr_frags; @@ -4495,7 +4495,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", s, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), s->sk_type, ntohs(po->num), po->ifindex, diff --git a/net/packet/internal.h b/net/packet/internal.h index 9ee46314b7d7..94d1d405a116 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -1,6 +1,8 @@ #ifndef __PACKET_INTERNAL_H__ #define __PACKET_INTERNAL_H__ +#include <linux/refcount.h> + struct packet_mclist { struct packet_mclist *next; int ifindex; @@ -86,7 +88,7 @@ struct packet_fanout { struct list_head list; struct sock *arr[PACKET_FANOUT_MAX]; spinlock_t lock; - atomic_t sk_ref; + refcount_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 64634e3ec2fc..1b050dd17393 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -360,7 +360,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, return POLLHUP; if (sk->sk_state == TCP_ESTABLISHED && - atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf && + refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf && atomic_read(&pn->tx_credits)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; @@ -614,7 +614,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), - atomic_read(&sk->sk_refcnt), sk, + refcount_read(&sk->sk_refcnt), sk, atomic_read(&sk->sk_drops)); } seq_pad(seq, '\n'); diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 52d11d7725c8..0d8616aa5bad 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -202,7 +202,7 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) queue_delayed_work(rds_wq, &cp->cp_send_w, 0); out: diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 58ae0db52ea1..a2ad4482376f 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -53,7 +53,7 @@ static void rxrpc_sock_destructor(struct sock *); */ static inline int rxrpc_writable(struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf; + return refcount_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf; } /* @@ -730,7 +730,7 @@ static void rxrpc_sock_destructor(struct sock *sk) rxrpc_purge_queue(&sk->sk_receive_queue); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); @@ -747,7 +747,7 @@ static int rxrpc_release_sock(struct sock *sk) { struct rxrpc_sock *rx = rxrpc_sk(sk); - _enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt)); + _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); /* declare the socket closed for business */ sock_orphan(sk); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 67b02c45271b..b8985d01876a 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -27,7 +27,7 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); } /* @@ -38,7 +38,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) const void *here = __builtin_return_address(0); if (skb) { int n = atomic_read(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); } } @@ -49,7 +49,7 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); skb_get(skb); } @@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) int n; CHECK_SLAB_OKAY(&skb->users); n = atomic_dec_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); kfree_skb(skb); } } @@ -78,7 +78,7 @@ void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) int n; CHECK_SLAB_OKAY(&skb->users); n = atomic_dec_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); kfree_skb(skb); } } @@ -93,7 +93,7 @@ void rxrpc_purge_queue(struct sk_buff_head *list) while ((skb = skb_dequeue((list))) != NULL) { int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged)); trace_rxrpc_skb(skb, rxrpc_skb_rx_purged, - atomic_read(&skb->users), n, here); + refcount_read(&skb->users), n, here); kfree_skb(skb); } } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index eb0e9bab54c1..d6e97115500b 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -340,7 +340,7 @@ META_COLLECTOR(int_sk_refcnt) *err = -1; return; } - dst->value = atomic_read(&skb->sk->sk_refcnt); + dst->value = refcount_read(&skb->sk->sk_refcnt); } META_COLLECTOR(int_sk_rcvbuf) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 5d95401bbc02..43b94c7b69bd 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1019,7 +1019,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev, return sch; } /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ - ops->destroy(sch); + if (ops->destroy) + ops->destroy(sch); err_out3: dev_put(dev); kfree((char *) sch - sch->padded); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index de162592eee0..572fe2584e48 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -498,7 +498,7 @@ static void sch_atm_dequeue(unsigned long data) ATM_SKB(skb)->vcc = flow->vcc; memcpy(skb_push(skb, flow->hdr_len), flow->hdr, flow->hdr_len); - atomic_add(skb->truesize, + refcount_add(skb->truesize, &sk_atm(flow->vcc)->sk_wmem_alloc); /* atm.atm_options are already set by atm_tc_enqueue */ flow->vcc->send(flow->vcc, skb); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 757be416f778..fa4f530ab7e1 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a { struct net *net = sock_net(sk); struct sctp_sock *sp; - sctp_paramhdr_t *p; + struct sctp_paramhdr *p; int i; /* Retrieve the SCTP per socket area. */ @@ -284,9 +284,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a ntohs(ep->auth_chunk_list->param_hdr.length)); /* Get the AUTH random number for this association */ - p = (sctp_paramhdr_t *)asoc->c.auth_random; + p = (struct sctp_paramhdr *)asoc->c.auth_random; p->type = SCTP_PARAM_RANDOM; - p->length = htons(sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH); + p->length = htons(sizeof(*p) + SCTP_AUTH_RANDOM_LENGTH); get_random_bytes(p+1, SCTP_AUTH_RANDOM_LENGTH); return asoc; diff --git a/net/sctp/auth.c b/net/sctp/auth.c index f99d4855d3de..8ffa5985cd6e 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -538,7 +538,8 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc) if (!hmacs) return NULL; - n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1; + n_elt = (ntohs(hmacs->param_hdr.length) - + sizeof(struct sctp_paramhdr)) >> 1; for (i = 0; i < n_elt; i++) { id = ntohs(hmacs->hmac_ids[i]); @@ -589,7 +590,8 @@ int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, return 0; hmacs = (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs; - n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1; + n_elt = (ntohs(hmacs->param_hdr.length) - + sizeof(struct sctp_paramhdr)) >> 1; return __sctp_auth_find_hmacid(hmacs->hmac_ids, n_elt, hmac_id); } @@ -612,8 +614,8 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, if (asoc->default_hmac_id) return; - n_params = (ntohs(hmacs->param_hdr.length) - - sizeof(sctp_paramhdr_t)) >> 1; + n_params = (ntohs(hmacs->param_hdr.length) - + sizeof(struct sctp_paramhdr)) >> 1; ep = asoc->ep; for (i = 0; i < n_params; i++) { id = ntohs(hmacs->hmac_ids[i]); @@ -632,7 +634,7 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, /* Check to see if the given chunk is supposed to be authenticated */ -static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) +static int __sctp_auth_cid(enum sctp_cid chunk, struct sctp_chunks_param *param) { unsigned short len; int found = 0; @@ -641,7 +643,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) if (!param || param->param_hdr.length == 0) return 0; - len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t); + len = ntohs(param->param_hdr.length) - sizeof(struct sctp_paramhdr); /* SCTP-AUTH, Section 3.2 * The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH @@ -668,7 +670,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) } /* Check if peer requested that this chunk is authenticated */ -int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) +int sctp_auth_send_cid(enum sctp_cid chunk, const struct sctp_association *asoc) { if (!asoc) return 0; @@ -680,7 +682,7 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) } /* Check if we requested that peer authenticate this chunk. */ -int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) +int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc) { if (!asoc) return 0; @@ -775,7 +777,7 @@ int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id) /* Check if we can add this chunk to the array */ param_len = ntohs(p->param_hdr.length); - nchunks = param_len - sizeof(sctp_paramhdr_t); + nchunks = param_len - sizeof(struct sctp_paramhdr); if (nchunks == SCTP_NUM_CHUNK_TYPES) return -EINVAL; @@ -812,9 +814,11 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, return -EINVAL; for (i = 0; i < hmacs->shmac_num_idents; i++) - ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]); - ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + - hmacs->shmac_num_idents * sizeof(__u16)); + ep->auth_hmacs_list->hmac_ids[i] = + htons(hmacs->shmac_idents[i]); + ep->auth_hmacs_list->param_hdr.length = + htons(sizeof(struct sctp_paramhdr) + + hmacs->shmac_num_idents * sizeof(__u16)); return 0; } diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 3dcd0ecf3d99..efbc31877804 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -90,12 +90,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, */ auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO; auth_hmacs->param_hdr.length = - htons(sizeof(sctp_paramhdr_t) + 2); + htons(sizeof(struct sctp_paramhdr) + 2); auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1); /* Initialize the CHUNKS parameter */ auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; - auth_chunks->param_hdr.length = htons(sizeof(sctp_paramhdr_t)); + auth_chunks->param_hdr.length = + htons(sizeof(struct sctp_paramhdr)); /* If the Add-IP functionality is enabled, we must * authenticate, ASCONF and ASCONF-ACK chunks @@ -104,7 +105,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, auth_chunks->chunks[0] = SCTP_CID_ASCONF; auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; auth_chunks->param_hdr.length = - htons(sizeof(sctp_paramhdr_t) + 2); + htons(sizeof(struct sctp_paramhdr) + 2); } } @@ -268,16 +269,14 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) memset(ep->secret_key, 0, sizeof(ep->secret_key)); - /* Give up our hold on the sock. */ sk = ep->base.sk; - if (sk != NULL) { - /* Remove and free the port */ - if (sctp_sk(sk)->bind_hash) - sctp_put_port(sk); + /* Remove and free the port */ + if (sctp_sk(sk)->bind_hash) + sctp_put_port(sk); - sctp_sk(sk)->ep = NULL; - sock_put(sk); - } + sctp_sk(sk)->ep = NULL; + /* Give up our hold on the sock */ + sock_put(sk); kfree(ep); SCTP_DBG_OBJCNT_DEC(ep); diff --git a/net/sctp/input.c b/net/sctp/input.c index ba9ad32fc447..41eb2ec10460 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -663,19 +663,19 @@ out_unlock: */ static int sctp_rcv_ootb(struct sk_buff *skb) { - sctp_chunkhdr_t *ch, _ch; + struct sctp_chunkhdr *ch, _ch; int ch_end, offset = 0; /* Scan through all the chunks in the packet. */ do { /* Make sure we have at least the header there */ - if (offset + sizeof(sctp_chunkhdr_t) > skb->len) + if (offset + sizeof(_ch) > skb->len) break; ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch); /* Break out if chunk length is less then minimal. */ - if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) + if (ntohs(ch->length) < sizeof(_ch)) break; ch_end = offset + SCTP_PAD4(ntohs(ch->length)); @@ -1051,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, union sctp_addr *paddr = &addr; struct sctphdr *sh = sctp_hdr(skb); union sctp_params params; - sctp_init_chunk_t *init; + struct sctp_init_chunk *init; struct sctp_af *af; /* @@ -1070,7 +1070,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, /* Find the start of the TLVs and the end of the chunk. This is * the region we search for address parameters. */ - init = (sctp_init_chunk_t *)skb->data; + init = (struct sctp_init_chunk *)skb->data; /* Walk the parameters looking for embedded addresses. */ sctp_walk_params(params, init, init_hdr.params) { @@ -1106,7 +1106,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, */ static struct sctp_association *__sctp_rcv_asconf_lookup( struct net *net, - sctp_chunkhdr_t *ch, + struct sctp_chunkhdr *ch, const union sctp_addr *laddr, __be16 peer_port, struct sctp_transport **transportp) @@ -1144,7 +1144,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, struct sctp_transport **transportp) { struct sctp_association *asoc = NULL; - sctp_chunkhdr_t *ch; + struct sctp_chunkhdr *ch; int have_auth = 0; unsigned int chunk_num = 1; __u8 *ch_end; @@ -1152,10 +1152,10 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, /* Walk through the chunks looking for AUTH or ASCONF chunks * to help us find the association. */ - ch = (sctp_chunkhdr_t *) skb->data; + ch = (struct sctp_chunkhdr *)skb->data; do { /* Break out if chunk length is less then minimal. */ - if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) + if (ntohs(ch->length) < sizeof(*ch)) break; ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); @@ -1192,7 +1192,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, if (asoc) break; - ch = (sctp_chunkhdr_t *) ch_end; + ch = (struct sctp_chunkhdr *)ch_end; chunk_num++; } while (ch_end < skb_tail_pointer(skb)); @@ -1210,7 +1210,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, const union sctp_addr *laddr, struct sctp_transport **transportp) { - sctp_chunkhdr_t *ch; + struct sctp_chunkhdr *ch; /* We do not allow GSO frames here as we need to linearize and * then cannot guarantee frame boundaries. This shouldn't be an @@ -1220,7 +1220,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) return NULL; - ch = (sctp_chunkhdr_t *) skb->data; + ch = (struct sctp_chunkhdr *)skb->data; /* The code below will attempt to walk the chunk and extract * parameter information. Before we do that, we need to verify diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index f731de3e8428..48392552ee7c 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -99,7 +99,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue) { struct sctp_chunk *chunk; - sctp_chunkhdr_t *ch = NULL; + struct sctp_chunkhdr *ch = NULL; chunk = queue->in_progress; /* If there is no more chunks in this packet, say so */ @@ -108,7 +108,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue) chunk->pdiscard) return NULL; - ch = (sctp_chunkhdr_t *)chunk->chunk_end; + ch = (struct sctp_chunkhdr *)chunk->chunk_end; return ch; } @@ -122,7 +122,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue) struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) { struct sctp_chunk *chunk; - sctp_chunkhdr_t *ch = NULL; + struct sctp_chunkhdr *ch = NULL; /* The assumption is that we are safe to process the chunks * at this time. @@ -151,7 +151,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) chunk = queue->in_progress = NULL; } else { /* Nothing to do. Next chunk in the packet, please. */ - ch = (sctp_chunkhdr_t *) chunk->chunk_end; + ch = (struct sctp_chunkhdr *)chunk->chunk_end; /* Force chunk->skb->data to chunk->chunk_end. */ skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data); /* We are guaranteed to pull a SCTP header. */ @@ -195,7 +195,7 @@ next_chunk: new_skb: /* This is the first chunk in the packet. */ - ch = (sctp_chunkhdr_t *) chunk->skb->data; + ch = (struct sctp_chunkhdr *)chunk->skb->data; chunk->singleton = 1; chunk->data_accepted = 0; chunk->pdiscard = 0; @@ -214,11 +214,10 @@ new_skb: chunk->chunk_hdr = ch; chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); - skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); + skb_pull(chunk->skb, sizeof(*ch)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ - if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) < - skb_tail_pointer(chunk->skb)) { + if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) { /* This is not a singleton */ chunk->singleton = 0; } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { diff --git a/net/sctp/output.c b/net/sctp/output.c index 89cee1482d35..9d8504985744 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -402,7 +402,7 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) * therefore only reserve a single byte to keep socket around until * the packet has been transmitted. */ - atomic_inc(&sk->sk_wmem_alloc); + refcount_inc(&sk->sk_wmem_alloc); } static int sctp_packet_pack(struct sctp_packet *packet, @@ -723,8 +723,8 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, /* Check whether this chunk and all the rest of pending data will fit * or delay in hopes of bundling a full sized packet. */ - if (chunk->skb->len + q->out_qlen > - transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4) + if (chunk->skb->len + q->out_qlen > transport->pathmtu - + packet->overhead - sizeof(struct sctp_data_chunk) - 4) /* Enough data queued to fill a packet */ return SCTP_XMIT_OK; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 20299df163b9..e8762702a313 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1102,7 +1102,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : "illegal chunk", ntohl(chunk->subh.data_hdr->tsn), chunk->skb ? chunk->skb->head : NULL, chunk->skb ? - atomic_read(&chunk->skb->users) : -1); + refcount_read(&chunk->skb->users) : -1); /* Add the chunk to the packet. */ status = sctp_packet_transmit_chunk(packet, chunk, 0, gfp); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 8e34db56bc1d..26b4be6b4172 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -363,7 +363,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) assoc->stream.outcnt, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, assoc->rtx_data_chunks, - atomic_read(&sk->sk_wmem_alloc), + refcount_read(&sk->sk_wmem_alloc), sk->sk_wmem_queued, sk->sk_sndbuf, sk->sk_rcvbuf); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 4b1967997c16..3af4dd024ec0 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -217,7 +217,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, { struct net *net = sock_net(asoc->base.sk); struct sctp_endpoint *ep = asoc->ep; - sctp_inithdr_t init; + struct sctp_inithdr init; union sctp_params addrs; size_t chunksize; struct sctp_chunk *retval = NULL; @@ -229,7 +229,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_supported_ext_param_t ext_param; int num_ext = 0; __u8 extensions[3]; - sctp_paramhdr_t *auth_chunks = NULL, + struct sctp_paramhdr *auth_chunks = NULL, *auth_hmacs = NULL; /* RFC 2960 3.3.2 Initiation (INIT) (1) @@ -286,14 +286,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += sizeof(asoc->c.auth_random); /* Add HMACS parameter length if any were defined */ - auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; + auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs; if (auth_hmacs->length) chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ - auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; + auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks; if (auth_chunks->length) chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else @@ -385,7 +385,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, const struct sctp_chunk *chunk, gfp_t gfp, int unkparam_len) { - sctp_inithdr_t initack; + struct sctp_inithdr initack; struct sctp_chunk *retval; union sctp_params addrs; struct sctp_sock *sp; @@ -397,7 +397,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_supported_ext_param_t ext_param; int num_ext = 0; __u8 extensions[3]; - sctp_paramhdr_t *auth_chunks = NULL, + struct sctp_paramhdr *auth_chunks = NULL, *auth_hmacs = NULL, *auth_random = NULL; @@ -448,16 +448,16 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, chunksize += sizeof(aiparam); if (asoc->peer.auth_capable) { - auth_random = (sctp_paramhdr_t *)asoc->c.auth_random; + auth_random = (struct sctp_paramhdr *)asoc->c.auth_random; chunksize += ntohs(auth_random->length); - auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; + auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs; if (auth_hmacs->length) chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; - auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; + auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks; if (auth_chunks->length) chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else @@ -1085,18 +1085,18 @@ struct sctp_chunk *sctp_make_abort_violation( struct sctp_chunk *retval; struct sctp_paramhdr phdr; - retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen - + sizeof(sctp_paramhdr_t)); + retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen + + sizeof(phdr)); if (!retval) goto end; - sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen - + sizeof(sctp_paramhdr_t)); + sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen + + sizeof(phdr)); phdr.type = htons(chunk->chunk_hdr->type); phdr.length = chunk->chunk_hdr->length; sctp_addto_chunk(retval, paylen, payload); - sctp_addto_param(retval, sizeof(sctp_paramhdr_t), &phdr); + sctp_addto_param(retval, sizeof(phdr), &phdr); end: return retval; @@ -1110,16 +1110,16 @@ struct sctp_chunk *sctp_make_violation_paramlen( struct sctp_chunk *retval; static const char error[] = "The following parameter had invalid length:"; size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) + - sizeof(sctp_paramhdr_t); + sizeof(*param); retval = sctp_make_abort(asoc, chunk, payload_len); if (!retval) goto nodata; sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, - sizeof(error) + sizeof(sctp_paramhdr_t)); + sizeof(error) + sizeof(*param)); sctp_addto_chunk(retval, sizeof(error), error); - sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param); + sctp_addto_param(retval, sizeof(*param), param); nodata: return retval; @@ -1379,20 +1379,20 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, gfp_t gfp) { struct sctp_chunk *retval; - sctp_chunkhdr_t *chunk_hdr; + struct sctp_chunkhdr *chunk_hdr; struct sk_buff *skb; struct sock *sk; /* No need to allocate LL here, as this is only a chunk. */ - skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp); + skb = alloc_skb(SCTP_PAD4(sizeof(*chunk_hdr) + paylen), gfp); if (!skb) goto nodata; /* Make room for the chunk header. */ - chunk_hdr = skb_put(skb, sizeof(sctp_chunkhdr_t)); + chunk_hdr = (struct sctp_chunkhdr *)skb_put(skb, sizeof(*chunk_hdr)); chunk_hdr->type = type; chunk_hdr->flags = flags; - chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t)); + chunk_hdr->length = htons(sizeof(*chunk_hdr)); sk = asoc ? asoc->base.sk : NULL; retval = sctp_chunkify(skb, asoc, sk, gfp); @@ -1402,7 +1402,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, } retval->chunk_hdr = chunk_hdr; - retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(struct sctp_chunkhdr); + retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(*chunk_hdr); /* Determine if the chunk needs to be authenticated */ if (sctp_auth_send_cid(type, asoc)) @@ -1614,7 +1614,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, /* Header size is static data prior to the actual cookie, including * any padding. */ - headersize = sizeof(sctp_paramhdr_t) + + headersize = sizeof(struct sctp_paramhdr) + (sizeof(struct sctp_signed_cookie) - sizeof(struct sctp_cookie)); bodysize = sizeof(struct sctp_cookie) @@ -1710,7 +1710,7 @@ struct sctp_association *sctp_unpack_cookie( /* Header size is static data prior to the actual cookie, including * any padding. */ - headersize = sizeof(sctp_chunkhdr_t) + + headersize = sizeof(struct sctp_chunkhdr) + (sizeof(struct sctp_signed_cookie) - sizeof(struct sctp_cookie)); bodysize = ntohs(chunk->chunk_hdr->length) - headersize; @@ -1882,7 +1882,7 @@ struct __sctp_missing { * Report a missing mandatory parameter. */ static int sctp_process_missing_param(const struct sctp_association *asoc, - sctp_param_t paramtype, + enum sctp_param paramtype, struct sctp_chunk *chunk, struct sctp_chunk **errp) { @@ -1975,7 +1975,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, static int sctp_verify_ext_param(struct net *net, union sctp_params param) { - __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); + __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); int have_auth = 0; int have_asconf = 0; int i; @@ -2010,7 +2010,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc, union sctp_params param) { struct net *net = sock_net(asoc->base.sk); - __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); + __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); int i; for (i = 0; i < num_ext; i++) { @@ -2123,7 +2123,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, union sctp_params param, - sctp_cid_t cid, + enum sctp_cid cid, struct sctp_chunk *chunk, struct sctp_chunk **err_chunk) { @@ -2180,7 +2180,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, * cause 'Protocol Violation'. */ if (SCTP_AUTH_RANDOM_LENGTH != - ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) { + ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) { sctp_process_inv_paramlength(asoc, param.p, chunk, err_chunk); retval = SCTP_IERROR_ABORT; @@ -2208,7 +2208,8 @@ static sctp_ierror_t sctp_verify_param(struct net *net, goto fallthrough; hmacs = (struct sctp_hmac_algo_param *)param.p; - n_elt = (ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) >> 1; + n_elt = (ntohs(param.p->length) - + sizeof(struct sctp_paramhdr)) >> 1; /* SCTP-AUTH: Section 6.1 * The HMAC algorithm based on SHA-1 MUST be supported and @@ -2240,9 +2241,9 @@ fallthrough: /* Verify the INIT packet before we process it. */ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, - const struct sctp_association *asoc, sctp_cid_t cid, - sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk, - struct sctp_chunk **errp) + const struct sctp_association *asoc, enum sctp_cid cid, + struct sctp_init_chunk *peer_init, + struct sctp_chunk *chunk, struct sctp_chunk **errp) { union sctp_params param; bool has_cookie = false; @@ -2306,7 +2307,7 @@ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, */ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, const union sctp_addr *peer_addr, - sctp_init_chunk_t *peer_init, gfp_t gfp) + struct sctp_init_chunk *peer_init, gfp_t gfp) { struct net *net = sock_net(asoc->base.sk); union sctp_params param; @@ -2565,7 +2566,7 @@ do_addr_param: asoc->peer.ipv4_address = 1; /* Cycle through address types; avoid divide by 0. */ - sat = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); + sat = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); if (sat) sat /= sizeof(__u16); @@ -2592,7 +2593,7 @@ do_addr_param: case SCTP_PARAM_STATE_COOKIE: asoc->peer.cookie_len = - ntohs(param.p->length) - sizeof(sctp_paramhdr_t); + ntohs(param.p->length) - sizeof(struct sctp_paramhdr); asoc->peer.cookie = param.cookie->body; break; @@ -3176,7 +3177,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, return false; length = ntohs(param.addip->param_hdr.length); if (length < sizeof(sctp_addip_param_t) + - sizeof(sctp_paramhdr_t)) + sizeof(**errp)) return false; break; case SCTP_PARAM_SUCCESS_REPORT: @@ -3218,7 +3219,8 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, int chunk_len; __u32 serial; - chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); + chunk_len = ntohs(asconf->chunk_hdr->length) - + sizeof(struct sctp_chunkhdr); hdr = (sctp_addiphdr_t *)asconf->skb->data; serial = ntohl(hdr->serial); @@ -3364,7 +3366,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, err_code = SCTP_ERROR_REQ_REFUSED; asconf_ack_len = ntohs(asconf_ack->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t); + sizeof(struct sctp_chunkhdr); /* Skip the addiphdr from the asconf_ack chunk and store a pointer to * the first asconf_ack parameter. diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index dfe1fcb520ba..d6e5e9e0fd6d 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -647,7 +647,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands, static int sctp_cmd_process_init(sctp_cmd_seq_t *commands, struct sctp_association *asoc, struct sctp_chunk *chunk, - sctp_init_chunk_t *peer_init, + struct sctp_init_chunk *peer_init, gfp_t gfp) { int error; @@ -955,9 +955,10 @@ static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds, switch (err_hdr->cause) { case SCTP_ERROR_UNKNOWN_CHUNK: { - sctp_chunkhdr_t *unk_chunk_hdr; + struct sctp_chunkhdr *unk_chunk_hdr; - unk_chunk_hdr = (sctp_chunkhdr_t *)err_hdr->variable; + unk_chunk_hdr = (struct sctp_chunkhdr *) + err_hdr->variable; switch (unk_chunk_hdr->type) { /* ADDIP 4.1 A9) If the peer responds to an ASCONF with * an ERROR chunk reporting that it did not recognized diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8feff96a5bef..b2a74c3823ee 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -235,7 +235,7 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net, return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -345,7 +345,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, * error, but since we don't have an association, we'll * just discard the packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the INIT is coming toward a closing socket, we'll send back @@ -360,7 +360,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, - (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, + (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. @@ -368,9 +368,9 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, if (err_chunk) { packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + - sizeof(sctp_chunkhdr_t), + sizeof(struct sctp_chunkhdr), ntohs(err_chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); sctp_chunk_free(err_chunk); @@ -389,10 +389,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, } /* Grab the INIT header. */ - chunk->subh.init_hdr = (sctp_inithdr_t *)chunk->skb->data; + chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; /* Tag the variable length parameters. */ - chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t)); + chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); new_asoc = sctp_make_temp_asoc(ep, chunk, GFP_ATOMIC); if (!new_asoc) @@ -405,7 +405,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, /* The call, sctp_process_init(), can fail on memory allocation. */ if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), - (sctp_init_chunk_t *)chunk->chunk_hdr, + (struct sctp_init_chunk *)chunk->chunk_hdr, GFP_ATOMIC)) goto nomem_init; @@ -417,7 +417,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, len = 0; if (err_chunk) len = ntohs(err_chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t); + sizeof(struct sctp_chunkhdr); repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); if (!repl) @@ -437,7 +437,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, */ unk_param = (sctp_unrecognized_param_t *) ((__u8 *)(err_chunk->chunk_hdr) + - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" * parameter type. */ @@ -503,7 +503,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; - sctp_init_chunk_t *initchunk; + struct sctp_init_chunk *initchunk; struct sctp_chunk *err_chunk; struct sctp_packet *packet; @@ -522,12 +522,12 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ - chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; + chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, - (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, + (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { sctp_error_t error = SCTP_ERROR_NO_RESOURCE; @@ -540,9 +540,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, if (err_chunk) { packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + - sizeof(sctp_chunkhdr_t), + sizeof(struct sctp_chunkhdr), ntohs(err_chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); sctp_chunk_free(err_chunk); @@ -576,9 +576,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, /* Tag the variable length parameters. Note that we never * convert the parameters in an INIT chunk. */ - chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t)); + chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); - initchunk = (sctp_init_chunk_t *) chunk->chunk_hdr; + initchunk = (struct sctp_init_chunk *)chunk->chunk_hdr; sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT, SCTP_PEER_INIT(initchunk)); @@ -653,7 +653,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, { struct sctp_chunk *chunk = arg; struct sctp_association *new_asoc; - sctp_init_chunk_t *peer_init; + struct sctp_init_chunk *peer_init; struct sctp_chunk *repl; struct sctp_ulpevent *ev, *ai_ev = NULL; int error = 0; @@ -673,7 +673,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, * chunk header. More detailed verification is done * in sctp_unpack_cookie(). */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the endpoint is not listening or if the number of associations @@ -691,7 +691,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data; if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t))) + sizeof(struct sctp_chunkhdr))) goto nomem; /* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint @@ -770,9 +770,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, auth.skb = chunk->auth_chunk; auth.asoc = chunk->asoc; auth.sctp_hdr = chunk->sctp_hdr; - auth.chunk_hdr = skb_push(chunk->auth_chunk, - sizeof(sctp_chunkhdr_t)); - skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t)); + auth.chunk_hdr = (struct sctp_chunkhdr *) + skb_push(chunk->auth_chunk, + sizeof(struct sctp_chunkhdr)); + skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr)); auth.transport = chunk->transport; ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); @@ -886,7 +887,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, /* Verify that the chunk length for the COOKIE-ACK is OK. * If we don't do this, any bundled chunks may be junked. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -1080,7 +1081,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - sctp_paramhdr_t *param_hdr; + struct sctp_paramhdr *param_hdr; struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; size_t paylen = 0; @@ -1097,9 +1098,9 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net, * respond with a HEARTBEAT ACK that contains the Heartbeat * Information field copied from the received HEARTBEAT chunk. */ - chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data; - param_hdr = (sctp_paramhdr_t *) chunk->subh.hb_hdr; - paylen = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); + chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data; + param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr; + paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); if (ntohs(param_hdr->length) > paylen) return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, @@ -1164,7 +1165,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT-ACK chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) + + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) + sizeof(sctp_sender_hb_info_t))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -1449,19 +1450,19 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * In this case, we generate a protocol violation since we have * an association established. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ - chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; + chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; /* Tag the variable length parameters. */ - chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t)); + chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, - (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, + (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. @@ -1469,9 +1470,9 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (err_chunk) { packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + - sizeof(sctp_chunkhdr_t), + sizeof(struct sctp_chunkhdr), ntohs(err_chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, @@ -1508,7 +1509,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * place (local tie-tag and per tie-tag) within the state cookie. */ if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk), - (sctp_init_chunk_t *)chunk->chunk_hdr, + (struct sctp_init_chunk *)chunk->chunk_hdr, GFP_ATOMIC)) goto nomem; @@ -1535,7 +1536,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( len = 0; if (err_chunk) { len = ntohs(err_chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t); + sizeof(struct sctp_chunkhdr); } repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); @@ -1556,7 +1557,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( */ unk_param = (sctp_unrecognized_param_t *) ((__u8 *)(err_chunk->chunk_hdr) + - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" * parameter type. */ @@ -1729,7 +1730,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net, sctp_cmd_seq_t *commands, struct sctp_association *new_asoc) { - sctp_init_chunk_t *peer_init; + struct sctp_init_chunk *peer_init; struct sctp_ulpevent *ev; struct sctp_chunk *repl; struct sctp_chunk *err; @@ -1844,7 +1845,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net, sctp_cmd_seq_t *commands, struct sctp_association *new_asoc) { - sctp_init_chunk_t *peer_init; + struct sctp_init_chunk *peer_init; struct sctp_chunk *repl; /* new_asoc is a brand-new association, so these are not yet @@ -2044,7 +2045,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, * enough for the chunk header. Cookie length verification is * done later. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -2053,7 +2054,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, */ chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data; if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t))) + sizeof(struct sctp_chunkhdr))) goto nomem; /* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie @@ -2806,7 +2807,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net, struct sctp_chunk *reply; /* Make sure that the chunk has a valid length */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -2989,7 +2990,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3009,7 +3010,8 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, return SCTP_DISPOSITION_ABORT; case SCTP_IERROR_PROTO_VIOLATION: return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, - (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); + (u8 *)chunk->subh.data_hdr, + sizeof(struct sctp_datahdr)); default: BUG(); } @@ -3107,7 +3109,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3123,7 +3125,8 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, return SCTP_DISPOSITION_ABORT; case SCTP_IERROR_PROTO_VIOLATION: return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, - (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); + (u8 *)chunk->subh.data_hdr, + sizeof(struct sctp_datahdr)); default: BUG(); } @@ -3358,7 +3361,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* 10.2 H) SHUTDOWN COMPLETE notification @@ -3435,7 +3438,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, { struct sctp_chunk *chunk = arg; struct sk_buff *skb = chunk->skb; - sctp_chunkhdr_t *ch; + struct sctp_chunkhdr *ch; sctp_errhdr_t *err; __u8 *ch_end; int ootb_shut_ack = 0; @@ -3443,10 +3446,10 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); - ch = (sctp_chunkhdr_t *) chunk->chunk_hdr; + ch = (struct sctp_chunkhdr *)chunk->chunk_hdr; do { /* Report violation if the chunk is less then minimal */ - if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) + if (ntohs(ch->length) < sizeof(*ch)) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3487,7 +3490,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } } - ch = (sctp_chunkhdr_t *) ch_end; + ch = (struct sctp_chunkhdr *)ch_end; } while (ch_end < skb_tail_pointer(skb)); if (ootb_shut_ack) @@ -3560,7 +3563,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, /* If the chunk length is invalid, we don't want to process * the reset of the packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* We need to discard the rest of the packet to prevent @@ -3591,7 +3594,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net, struct sctp_chunk *chunk = arg; /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4256,7 +4259,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, { struct sctp_chunk *unk_chunk = arg; struct sctp_chunk *err_chunk; - sctp_chunkhdr_t *hdr; + struct sctp_chunkhdr *hdr; pr_debug("%s: processing unknown chunk id:%d\n", __func__, type.chunk); @@ -4267,7 +4270,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. */ - if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(unk_chunk, sizeof(*hdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4340,7 +4343,7 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net, * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4405,7 +4408,7 @@ sctp_disposition_t sctp_sf_violation(struct net *net, struct sctp_chunk *chunk = arg; /* Make sure that the chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -6121,9 +6124,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT: { - sctp_init_chunk_t *init; + struct sctp_init_chunk *init; - init = (sctp_init_chunk_t *)chunk->chunk_hdr; + init = (struct sctp_init_chunk *)chunk->chunk_hdr; vtag = ntohl(init->init_hdr.init_tag); break; } @@ -6196,7 +6199,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands) { - sctp_datahdr_t *data_hdr; + struct sctp_datahdr *data_hdr; struct sctp_chunk *err; size_t datalen; sctp_verb_t deliver; @@ -6209,8 +6212,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, u16 sid; u8 ordered = 0; - data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data; - skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); + data_hdr = (struct sctp_datahdr *)chunk->skb->data; + chunk->subh.data_hdr = data_hdr; + skb_pull(chunk->skb, sizeof(*data_hdr)); tsn = ntohl(data_hdr->tsn); pr_debug("%s: TSN 0x%x\n", __func__, tsn); @@ -6258,7 +6262,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * Actually, allow a little bit of overflow (up to a MTU). */ datalen = ntohs(chunk->chunk_hdr->length); - datalen -= sizeof(sctp_data_chunk_t); + datalen -= sizeof(struct sctp_data_chunk); deliver = SCTP_CMD_CHUNK_ULP; diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 419b18ebb056..3e958c1c4b95 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -53,7 +53,7 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES]; static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, - sctp_cid_t cid, + enum sctp_cid cid, sctp_state_t state); @@ -968,7 +968,7 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S }; static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, - sctp_cid_t cid, + enum sctp_cid cid, sctp_state_t state) { if (state > SCTP_STATE_MAX) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7b6e20eb9451..1db478e34520 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -164,7 +164,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) sizeof(struct sk_buff) + sizeof(struct sctp_chunk); - atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); sk->sk_wmem_queued += chunk->skb->truesize; sk_mem_charge(sk, chunk->skb->truesize); } @@ -4933,11 +4933,47 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) } EXPORT_SYMBOL(sctp_do_peeloff); +static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *peeloff, + struct file **newfile, unsigned flags) +{ + struct socket *newsock; + int retval; + + retval = sctp_do_peeloff(sk, peeloff->associd, &newsock); + if (retval < 0) + goto out; + + /* Map the socket to an unused fd that can be returned to the user. */ + retval = get_unused_fd_flags(flags & SOCK_CLOEXEC); + if (retval < 0) { + sock_release(newsock); + goto out; + } + + *newfile = sock_alloc_file(newsock, 0, NULL); + if (IS_ERR(*newfile)) { + put_unused_fd(retval); + sock_release(newsock); + retval = PTR_ERR(*newfile); + *newfile = NULL; + return retval; + } + + pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk, + retval); + + peeloff->sd = retval; + + if (flags & SOCK_NONBLOCK) + (*newfile)->f_flags |= O_NONBLOCK; +out: + return retval; +} + static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval, int __user *optlen) { sctp_peeloff_arg_t peeloff; - struct socket *newsock; - struct file *newfile; + struct file *newfile = NULL; int retval = 0; if (len < sizeof(sctp_peeloff_arg_t)) @@ -4946,26 +4982,44 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval if (copy_from_user(&peeloff, optval, len)) return -EFAULT; - retval = sctp_do_peeloff(sk, peeloff.associd, &newsock); + retval = sctp_getsockopt_peeloff_common(sk, &peeloff, &newfile, 0); if (retval < 0) goto out; - /* Map the socket to an unused fd that can be returned to the user. */ - retval = get_unused_fd_flags(0); - if (retval < 0) { - sock_release(newsock); - goto out; + /* Return the fd mapped to the new socket. */ + if (put_user(len, optlen)) { + fput(newfile); + put_unused_fd(retval); + return -EFAULT; } - newfile = sock_alloc_file(newsock, 0, NULL); - if (IS_ERR(newfile)) { + if (copy_to_user(optval, &peeloff, len)) { + fput(newfile); put_unused_fd(retval); - sock_release(newsock); - return PTR_ERR(newfile); + return -EFAULT; } + fd_install(retval, newfile); +out: + return retval; +} - pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk, - retval); +static int sctp_getsockopt_peeloff_flags(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + sctp_peeloff_flags_arg_t peeloff; + struct file *newfile = NULL; + int retval = 0; + + if (len < sizeof(sctp_peeloff_flags_arg_t)) + return -EINVAL; + len = sizeof(sctp_peeloff_flags_arg_t); + if (copy_from_user(&peeloff, optval, len)) + return -EFAULT; + + retval = sctp_getsockopt_peeloff_common(sk, &peeloff.p_arg, + &newfile, peeloff.flags); + if (retval < 0) + goto out; /* Return the fd mapped to the new socket. */ if (put_user(len, optlen)) { @@ -4973,7 +5027,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval put_unused_fd(retval); return -EFAULT; } - peeloff.sd = retval; + if (copy_to_user(optval, &peeloff, len)) { fput(newfile); put_unused_fd(retval); @@ -6033,7 +6087,8 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, return -EACCES; hmacs = ep->auth_hmacs_list; - data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t); + data_len = ntohs(hmacs->param_hdr.length) - + sizeof(struct sctp_paramhdr); if (len < sizeof(struct sctp_hmacalgo) + data_len) return -EINVAL; @@ -6117,7 +6172,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, goto num; /* See if the user provided enough room for all the data */ - num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t); + num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr); if (len < num_chunks) return -EINVAL; @@ -6165,7 +6220,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, if (!ch) goto num; - num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t); + num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr); if (len < sizeof(struct sctp_authchunks) + num_chunks) return -EINVAL; @@ -6758,6 +6813,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_SOCKOPT_PEELOFF: retval = sctp_getsockopt_peeloff(sk, len, optval, optlen); break; + case SCTP_SOCKOPT_PEELOFF_FLAGS: + retval = sctp_getsockopt_peeloff_flags(sk, len, optval, optlen); + break; case SCTP_PEER_ADDR_PARAMS: retval = sctp_getsockopt_peer_addr_params(sk, len, optval, optlen); @@ -7563,7 +7621,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (flags & MSG_PEEK) { skb = skb_peek(&sk->sk_receive_queue); if (skb) - atomic_inc(&skb->users); + refcount_inc(&skb->users); } else { skb = __skb_dequeue(&sk->sk_receive_queue); } @@ -7684,7 +7742,7 @@ static void sctp_wfree(struct sk_buff *skb) sizeof(struct sk_buff) + sizeof(struct sctp_chunk); - atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc)); /* * This undoes what is done via sctp_set_owner_w and sk_mem_charge diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 82e6d40052a8..63ea15503714 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -304,7 +304,7 @@ out: return retval; } -static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param( +static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param( struct sctp_association *asoc, __u32 resp_seq, __be16 type) { @@ -749,7 +749,7 @@ struct sctp_chunk *sctp_process_strreset_resp( struct sctp_strreset_resp *resp = param.v; struct sctp_transport *t; __u16 i, nums, flags = 0; - sctp_paramhdr_t *req; + struct sctp_paramhdr *req; __u32 result; req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0); diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 17854fb0e512..5f86c5062a98 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -158,7 +158,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( /* Trim the buffer to the right length. */ skb_trim(skb, sizeof(struct sctp_assoc_change) + ntohs(chunk->chunk_hdr->length) - - sizeof(sctp_chunkhdr_t)); + sizeof(struct sctp_chunkhdr)); } else { event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change), MSG_NOTIFICATION, gfp); diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 25f7e4140566..0225d62a869f 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1090,7 +1090,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, if (chunk) { needed = ntohs(chunk->chunk_hdr->length); - needed -= sizeof(sctp_data_chunk_t); + needed -= sizeof(struct sctp_data_chunk); } else needed = SCTP_DEFAULT_MAXWINDOW; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1b92b72e812f..101e3597338f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2313,7 +2313,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk) struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1a0c961f4ffe..b9ee766054f6 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -212,7 +212,7 @@ EXPORT_SYMBOL_GPL(unix_peer_get); static inline void unix_release_addr(struct unix_address *addr) { - if (atomic_dec_and_test(&addr->refcnt)) + if (refcount_dec_and_test(&addr->refcnt)) kfree(addr); } @@ -442,7 +442,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) static int unix_writable(const struct sock *sk) { return sk->sk_state != TCP_LISTEN && - (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; + (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; } static void unix_write_space(struct sock *sk) @@ -487,7 +487,7 @@ static void unix_sock_destructor(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { @@ -864,7 +864,7 @@ static int unix_autobind(struct socket *sock) goto out; addr->name->sun_family = AF_UNIX; - atomic_set(&addr->refcnt, 1); + refcount_set(&addr->refcnt, 1); retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); @@ -1040,7 +1040,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) memcpy(addr->name, sunaddr, addr_len); addr->len = addr_len; addr->hash = hash ^ sk->sk_type; - atomic_set(&addr->refcnt, 1); + refcount_set(&addr->refcnt, 1); if (sun_path[0]) { addr->hash = UNIX_HASH_SIZE; @@ -1335,7 +1335,7 @@ restart: /* copy address information from listening to new sock*/ if (otheru->addr) { - atomic_inc(&otheru->addr->refcnt); + refcount_inc(&otheru->addr->refcnt); newu->addr = otheru->addr; } if (otheru->path.dentry) { @@ -2033,7 +2033,7 @@ alloc_skb: skb->len += size; skb->data_len += size; skb->truesize += size; - atomic_add(size, &sk->sk_wmem_alloc); + refcount_add(size, &sk->sk_wmem_alloc); if (newskb) { err = unix_scm_to_skb(&scm, skb, false); @@ -2847,7 +2847,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", s, - atomic_read(&s->sk_refcnt), + refcount_read(&s->sk_refcnt), 0, s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, s->sk_type, diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index abf81b329dc1..55b2ac300995 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -4,8 +4,7 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ - xfrm_sysctl.o xfrm_replay.o -obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o + xfrm_sysctl.o xfrm_replay.o xfrm_device.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 6d4a60d1bf19..5f7e8bfa0c2d 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -22,6 +22,7 @@ #include <net/xfrm.h> #include <linux/notifier.h> +#ifdef CONFIG_XFRM_OFFLOAD int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) { int err; @@ -137,6 +138,7 @@ ok: return true; } EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); +#endif static int xfrm_dev_register(struct net_device *dev) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a3dc7ab0b7ed..4706df612170 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1006,10 +1006,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) err = -ESRCH; out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); - - if (cnt) - xfrm_garbage_collect(net); - return err; } EXPORT_SYMBOL(xfrm_policy_flush); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 6197c7231bc7..2be4c6af008a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2027,6 +2027,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; return err; } + xfrm_garbage_collect(net); c.data.type = type; c.event = nlh->nlmsg_type; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index e7ec9b8539a5..9c650589e80f 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -36,6 +36,7 @@ hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel hostprogs-y += test_map_in_map hostprogs-y += per_socket_stats_example +hostprogs-y += load_sock_ops # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o @@ -52,6 +53,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o +load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o @@ -111,6 +113,12 @@ always += lwt_len_hist_kern.o always += xdp_tx_iptunnel_kern.o always += test_map_in_map_kern.o always += cookie_uid_helper_example.o +always += tcp_synrto_kern.o +always += tcp_rwnd_kern.o +always += tcp_bufs_kern.o +always += tcp_cong_kern.o +always += tcp_iw_kern.o +always += tcp_clamp_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -130,6 +138,7 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf +HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf HOSTLOADLIBES_trace_output += -lelf -lrt HOSTLOADLIBES_lathist += -lelf diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index f4840b8bb8f9..d50ac342dc92 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -60,6 +60,9 @@ static unsigned long long (*bpf_get_prandom_u32)(void) = (void *) BPF_FUNC_get_prandom_u32; static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_head; +static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, + int optlen) = + (void *) BPF_FUNC_setsockopt; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index a91c57dd8571..a4be7cfa6519 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -64,6 +64,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_perf_event = strncmp(event, "perf_event", 10) == 0; bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; + bool is_sockops = strncmp(event, "sockops", 7) == 0; size_t insns_cnt = size / sizeof(struct bpf_insn); enum bpf_prog_type prog_type; char buf[256]; @@ -89,6 +90,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_CGROUP_SKB; } else if (is_cgroup_sk) { prog_type = BPF_PROG_TYPE_CGROUP_SOCK; + } else if (is_sockops) { + prog_type = BPF_PROG_TYPE_SOCK_OPS; } else { printf("Unknown event '%s'\n", event); return -1; @@ -106,8 +109,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) return 0; - if (is_socket) { - event += 6; + if (is_socket || is_sockops) { + if (is_socket) + event += 6; + else + event += 7; if (*event != '/') return 0; event++; @@ -560,7 +566,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) memcmp(shname, "xdp", 3) == 0 || memcmp(shname, "perf_event", 10) == 0 || memcmp(shname, "socket", 6) == 0 || - memcmp(shname, "cgroup/", 7) == 0) + memcmp(shname, "cgroup/", 7) == 0 || + memcmp(shname, "sockops", 7) == 0) load_and_attach(shname, data->d_buf, data->d_size); } diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c new file mode 100644 index 000000000000..e5da6cf71a3e --- /dev/null +++ b/samples/bpf/load_sock_ops.c @@ -0,0 +1,97 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/unistd.h> + +static void usage(char *pname) +{ + printf("USAGE:\n %s [-l] <cg-path> <prog filename>\n", pname); + printf("\tLoad and attach a sock_ops program to the specified " + "cgroup\n"); + printf("\tIf \"-l\" is used, the program will continue to run\n"); + printf("\tprinting the BPF log buffer\n"); + printf("\tIf the specified filename does not end in \".o\", it\n"); + printf("\tappends \"_kern.o\" to the name\n"); + printf("\n"); + printf(" %s -r <cg-path>\n", pname); + printf("\tDetaches the currently attached sock_ops program\n"); + printf("\tfrom the specified cgroup\n"); + printf("\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int logFlag = 0; + int error = 0; + char *cg_path; + char fn[500]; + char *prog; + int cg_fd; + + if (argc < 3) + usage(argv[0]); + + if (!strcmp(argv[1], "-r")) { + cg_path = argv[2]; + cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); + error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + if (error) { + printf("ERROR: bpf_prog_detach: %d (%s)\n", + error, strerror(errno)); + return 2; + } + return 0; + } else if (!strcmp(argv[1], "-h")) { + usage(argv[0]); + } else if (!strcmp(argv[1], "-l")) { + logFlag = 1; + if (argc < 4) + usage(argv[0]); + } + + prog = argv[argc - 1]; + cg_path = argv[argc - 2]; + if (strlen(prog) > 480) { + fprintf(stderr, "ERROR: program name too long (> 480 chars)\n"); + return 3; + } + cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); + + if (!strcmp(prog + strlen(prog)-2, ".o")) + strcpy(fn, prog); + else + sprintf(fn, "%s_kern.o", prog); + if (logFlag) + printf("loading bpf file:%s\n", fn); + if (load_bpf_file(fn)) { + printf("ERROR: load_bpf_file failed for: %s\n", fn); + printf("%s", bpf_log_buf); + return 4; + } + if (logFlag) + printf("TCP BPF Loaded %s\n", fn); + + error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (error) { + printf("ERROR: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + return 5; + } else if (logFlag) { + read_trace_pipe(); + } + + return error; +} diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index b5524d417eb5..877ecf8fc5ac 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -8,6 +8,10 @@ #include <arpa/inet.h> #include <sys/resource.h> +#define PARSE_IP 3 +#define PARSE_IP_PROG_FD (prog_fd[0]) +#define PROG_ARRAY_FD (map_fd[0]) + struct bpf_flow_keys { __be32 src; __be32 dst; @@ -28,7 +32,9 @@ int main(int argc, char **argv) struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; FILE *f; - int i, sock; + int i, sock, err, id, key = PARSE_IP; + struct bpf_prog_info info = {}; + uint32_t info_len = sizeof(info); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); setrlimit(RLIMIT_MEMLOCK, &r); @@ -38,6 +44,13 @@ int main(int argc, char **argv) return 1; } + /* Test fd array lookup which returns the id of the bpf_prog */ + err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len); + assert(!err); + err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id); + assert(!err); + assert(id == info.id); + sock = open_raw_sock("lo"); assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4], diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c new file mode 100644 index 000000000000..ee83bbabd17c --- /dev/null +++ b/samples/bpf/tcp_bufs_kern.c @@ -0,0 +1,86 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial receive window to 40 packets and send + * and receive buffers to 1.5MB. This would usually be done after + * doing appropriate checks that indicate the hosts are far enough + * away (i.e. large RTT). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_bufs(struct bpf_sock_ops *skops) +{ + int bufsize = 1500000; + int rwnd_init = 40; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + + /* Usually there would be a check to insure the hosts are far + * from each other so it makes sense to increase buffer sizes + */ + switch (op) { + case BPF_SOCK_OPS_RWND_INIT: + rv = rwnd_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + /* Nothing to do */ + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + default: + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c new file mode 100644 index 000000000000..d68eadd9ca2d --- /dev/null +++ b/samples/bpf/tcp_clamp_kern.c @@ -0,0 +1,102 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Sample BPF program to set send and receive buffers to 150KB, sndcwnd clamp + * to 100 packets and SYN and SYN_ACK RTOs to 10ms when both hosts are within + * the same datacenter. For his example, we assume they are within the same + * datacenter when the first 5.5 bytes of their IPv6 addresses are the same. + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_clamp(struct bpf_sock_ops *skops) +{ + int bufsize = 150000; + int to_init = 10; + int clamp = 100; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check that both hosts are within same datacenter. For this example + * it is the case when the first 5.5 bytes of their IPv6 addresses are + * the same. + */ + if (skops->family == AF_INET6 && + skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) { + switch (op) { + case BPF_SOCK_OPS_TIMEOUT_INIT: + rv = to_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, + &bufsize, sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, + TCP_BPF_SNDCWND_CLAMP, + &clamp, sizeof(clamp)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_TCP, + TCP_BPF_SNDCWND_CLAMP, + &clamp, sizeof(clamp)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, + SO_RCVBUF, &bufsize, + sizeof(bufsize)); + break; + default: + rv = -1; + } + } else { + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c new file mode 100644 index 000000000000..dac15bce1fa9 --- /dev/null +++ b/samples/bpf/tcp_cong_kern.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set congestion control to dctcp when both hosts are + * in the same datacenter (as deteremined by IPv6 prefix). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/tcp.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_cong(struct bpf_sock_ops *skops) +{ + char cong[] = "dctcp"; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check if both hosts are in the same datacenter. For this + * example they are if the 1st 5.5 bytes in the IPv6 address + * are the same. + */ + if (skops->family == AF_INET6 && + skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) { + switch (op) { + case BPF_SOCK_OPS_NEEDS_ECN: + rv = 1; + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, + cong, sizeof(cong)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, + cong, sizeof(cong)); + break; + default: + rv = -1; + } + } else { + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c new file mode 100644 index 000000000000..23c5122ef819 --- /dev/null +++ b/samples/bpf/tcp_iw_kern.c @@ -0,0 +1,88 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial congestion window and initial receive + * window to 40 packets and send and receive buffers to 1.5MB. This + * would usually be done after doing appropriate checks that indicate + * the hosts are far enough away (i.e. large RTT). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_iw(struct bpf_sock_ops *skops) +{ + int bufsize = 1500000; + int rwnd_init = 40; + int iw = 40; + int rv = 0; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Usually there would be a check to insure the hosts are far + * from each other so it makes sense to increase buffer sizes + */ + switch (op) { + case BPF_SOCK_OPS_RWND_INIT: + rv = rwnd_init; + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + /* Set sndbuf and rcvbuf of active connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw, + sizeof(iw)); + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + /* Set sndbuf and rcvbuf of passive connections */ + rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize, + sizeof(bufsize)); + rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF, + &bufsize, sizeof(bufsize)); + break; + default: + rv = -1; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c new file mode 100644 index 000000000000..3f2a228f81ce --- /dev/null +++ b/samples/bpf/tcp_rwnd_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set initial receive window to 40 packets when using IPv6 + * and the first 5.5 bytes of the IPv6 addresses are not the same (in this + * example that means both hosts are not the same datacenter). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_rwnd(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != + 55601 && skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check for RWND_INIT operation and IPv6 addresses */ + if (op == BPF_SOCK_OPS_RWND_INIT && + skops->family == AF_INET6) { + + /* If the first 5.5 bytes of the IPv6 address are not the same + * then both hosts are not in the same datacenter + * so use a larger initial advertized window (40 packets) + */ + if (skops->local_ip6[0] != skops->remote_ip6[0] || + (bpf_ntohl(skops->local_ip6[1]) & 0xfffff000) != + (bpf_ntohl(skops->remote_ip6[1]) & 0xfffff000)) + rv = 40; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c new file mode 100644 index 000000000000..3c3fc83d81cb --- /dev/null +++ b/samples/bpf/tcp_synrto_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * BPF program to set SYN and SYN-ACK RTOs to 10ms when using IPv6 addresses + * and the first 5.5 bytes of the IPv6 addresses are the same (in this example + * that means both hosts are in the same datacenter). + * + * Use load_sock_ops to load this BPF program. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <linux/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define DEBUG 1 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sockops") +int bpf_synrto(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + /* For testing purposes, only execute rest of BPF program + * if neither port numberis 55601 + */ + if (bpf_ntohl(skops->remote_port) != 55601 && + skops->local_port != 55601) + return -1; + + op = (int) skops->op; + +#ifdef DEBUG + bpf_printk("BPF command: %d\n", op); +#endif + + /* Check for TIMEOUT_INIT operation and IPv6 addresses */ + if (op == BPF_SOCK_OPS_TIMEOUT_INIT && + skops->family == AF_INET6) { + + /* If the first 5.5 bytes of the IPv6 address are the same + * then both hosts are in the same datacenter + * so use an RTO of 10ms + */ + if (skops->local_ip6[0] == skops->remote_ip6[0] && + (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) == + (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) + rv = 10; + } +#ifdef DEBUG + bpf_printk("Returning %d\n", rv); +#endif + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c index f62fdc2bd428..1aca18539d8d 100644 --- a/samples/bpf/test_map_in_map_user.c +++ b/samples/bpf/test_map_in_map_user.c @@ -32,6 +32,20 @@ static const char * const test_names[] = { #define NR_TESTS (sizeof(test_names) / sizeof(*test_names)) +static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key) +{ + struct bpf_map_info info = {}; + uint32_t info_len = sizeof(info); + int ret, id; + + ret = bpf_obj_get_info_by_fd(inner_map_fd, &info, &info_len); + assert(!ret); + + ret = bpf_map_lookup_elem(map_in_map_fd, &key, &id); + assert(!ret); + assert(id == info.id); +} + static void populate_map(uint32_t port_key, int magic_result) { int ret; @@ -45,12 +59,15 @@ static void populate_map(uint32_t port_key, int magic_result) ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY); assert(!ret); + check_map_id(PORT_A, A_OF_PORT_A, port_key); ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST); assert(!ret); + check_map_id(PORT_A, H_OF_PORT_A, port_key); ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST); assert(!ret); + check_map_id(PORT_H, H_OF_PORT_H, port_key); } static void test_map_in_map(void) diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst index ce753a408c56..c583a1e1bd3c 100644 --- a/scripts/Makefile.headersinst +++ b/scripts/Makefile.headersinst @@ -14,7 +14,15 @@ __headers: include scripts/Kbuild.include srcdir := $(srctree)/$(obj) -subdirs := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.)) + +# When make is run under a fakechroot environment, the function +# $(wildcard $(srcdir)/*/.) doesn't only return directories, but also regular +# files. So, we are using a combination of sort/dir/wildcard which works +# with fakechroot. +subdirs := $(patsubst $(srcdir)/%/,%,\ + $(filter-out $(srcdir)/,\ + $(sort $(dir $(wildcard $(srcdir)/*/))))) + # caller may set destination dir (when installing to asm/) _dst := $(if $(dst),$(dst),$(obj)) diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h index 3bffdcaaa274..b724a0290c75 100644 --- a/scripts/genksyms/genksyms.h +++ b/scripts/genksyms/genksyms.h @@ -75,7 +75,7 @@ struct string_list *copy_list_range(struct string_list *start, int yylex(void); int yyparse(void); -void error_with_pos(const char *, ...); +void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2))); /*----------------------------------------------------------------------*/ #define xmalloc(size) ({ void *__ptr = malloc(size); \ diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 90a091b6ae4d..eb8144643b78 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -196,7 +196,7 @@ clean-files += config.pot linux.pot # Check that we have the required ncurses stuff installed for lxdialog (menuconfig) PHONY += $(obj)/dochecklxdialog -$(addprefix $(obj)/,$(lxdialog)): $(obj)/dochecklxdialog +$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/dochecklxdialog $(obj)/dochecklxdialog: $(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_EXTRACFLAGS) $(HOSTLOADLIBES_mconf) diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index a9bc5334a478..003114779815 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -271,7 +271,7 @@ static struct mitem k_menu_items[MAX_MENU_ITEMS]; static int items_num; static int global_exit; /* the currently selected button */ -const char *current_instructions = menu_instructions; +static const char *current_instructions = menu_instructions; static char *dialog_input_result; static int dialog_input_result_len; @@ -305,7 +305,7 @@ struct function_keys { }; static const int function_keys_num = 9; -struct function_keys function_keys[] = { +static struct function_keys function_keys[] = { { .key_str = "F1", .func = "Help", @@ -508,7 +508,7 @@ static int get_mext_match(const char *match_str, match_f flag) index = (index + items_num) % items_num; while (true) { char *str = k_menu_items[index].str; - if (strcasestr(str, match_str) != 0) + if (strcasestr(str, match_str) != NULL) return index; if (flag == FIND_NEXT_MATCH_UP || flag == MATCH_TINKER_PATTERN_UP) @@ -1067,7 +1067,7 @@ static int do_match(int key, struct match_state *state, int *ans) static void conf(struct menu *menu) { - struct menu *submenu = 0; + struct menu *submenu = NULL; const char *prompt = menu_get_prompt(menu); struct symbol *sym; int res; @@ -1234,7 +1234,7 @@ static void show_help(struct menu *menu) static void conf_choice(struct menu *menu) { const char *prompt = _(menu_get_prompt(menu)); - struct menu *child = 0; + struct menu *child = NULL; struct symbol *active; int selected_index = 0; int last_top_row = 0; @@ -1456,7 +1456,7 @@ static void conf_save(void) } } -void setup_windows(void) +static void setup_windows(void) { int lines, columns; diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c index 4b2f44c20caf..a64b1c31253e 100644 --- a/scripts/kconfig/nconf.gui.c +++ b/scripts/kconfig/nconf.gui.c @@ -129,7 +129,7 @@ static void no_colors_theme(void) mkattrn(FUNCTION_TEXT, A_REVERSE); } -void set_colors() +void set_colors(void) { start_color(); use_default_colors(); @@ -192,7 +192,7 @@ const char *get_line(const char *text, int line_no) int lines = 0; if (!text) - return 0; + return NULL; for (i = 0; text[i] != '\0' && lines < line_no; i++) if (text[i] == '\n') diff --git a/scripts/tags.sh b/scripts/tags.sh index d661f2f3ef61..d23dcbf17457 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -106,6 +106,7 @@ all_compiled_sources() case "$i" in *.[cS]) j=${i/\.[cS]/\.o} + j="${j#$tree}" if [ -e $j ]; then echo $i fi diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 5088d4b8db22..009e6c98754e 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2492,7 +2492,7 @@ static int pcm_chmap_ctl_get(struct snd_kcontrol *kcontrol, struct snd_pcm_substream *substream; const struct snd_pcm_chmap_elem *map; - if (snd_BUG_ON(!info->chmap)) + if (!info->chmap) return -EINVAL; substream = snd_pcm_chmap_substream(info, idx); if (!substream) @@ -2524,7 +2524,7 @@ static int pcm_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag, unsigned int __user *dst; int c, count = 0; - if (snd_BUG_ON(!info->chmap)) + if (!info->chmap) return -EINVAL; if (size < 8) return -ENOMEM; diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 9e6f54f8c45d..1e26854b3425 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -682,7 +682,9 @@ static void out_stream_callback(struct fw_iso_context *context, u32 tstamp, cycle = increment_cycle_count(cycle, 1); if (s->handle_packet(s, 0, cycle, i) < 0) { s->packet_index = -1; - amdtp_stream_pcm_abort(s); + if (in_interrupt()) + amdtp_stream_pcm_abort(s); + WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN); return; } } @@ -734,7 +736,9 @@ static void in_stream_callback(struct fw_iso_context *context, u32 tstamp, /* Queueing error or detecting invalid payload. */ if (i < packets) { s->packet_index = -1; - amdtp_stream_pcm_abort(s); + if (in_interrupt()) + amdtp_stream_pcm_abort(s); + WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN); return; } diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index 7e8831722821..ea1a91e99875 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -135,7 +135,7 @@ struct amdtp_stream { /* For a PCM substream processing. */ struct snd_pcm_substream *pcm; struct tasklet_struct period_tasklet; - unsigned int pcm_buffer_pointer; + snd_pcm_uframes_t pcm_buffer_pointer; unsigned int pcm_period_pointer; /* To wait for first packet. */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 1770f085c2a6..01eb1dc7b5b3 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -370,10 +370,12 @@ enum { #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71) #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) +#define IS_BXT_T(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x1a98) #define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198) -#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ - IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) || \ - IS_GLK(pci) +#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348) +#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci) || \ + IS_BXT_T(pci) || IS_KBL(pci) || IS_KBL_LP(pci) || \ + IS_KBL_H(pci) || IS_GLK(pci) || IS_CFL(pci)) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -2378,6 +2380,9 @@ static const struct pci_device_id azx_ids[] = { /* Kabylake-H */ { PCI_DEVICE(0x8086, 0xa2f0), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, + /* Coffelake */ + { PCI_DEVICE(0x8086, 0xa348), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE}, /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f94b48b168dc..ce2988be4f0e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -120,12 +120,14 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_IN, BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, + BPF_PROG_TYPE_SOCK_OPS, }; enum bpf_attach_type { BPF_CGROUP_INET_INGRESS, BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_SOCK_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -518,6 +520,25 @@ union bpf_attr { * Set full skb->hash. * @skb: pointer to skb * @hash: hash to set + * + * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) + * Calls setsockopt. Not all opts are available, only those with + * integer optvals plus TCP_CONGESTION. + * Supported levels: SOL_SOCKET and IPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: SOL_SOCKET or IPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in byes + * Return: 0 or negative error + * + * int bpf_skb_adjust_room(skb, len_diff, mode, flags) + * Grow or shrink room in sk_buff. + * @skb: pointer to skb + * @len_diff: (signed) amount of room to grow/shrink + * @mode: operation mode (enum bpf_adj_room_mode) + * @flags: reserved for future use + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -568,7 +589,9 @@ union bpf_attr { FN(probe_read_str), \ FN(get_socket_cookie), \ FN(get_socket_uid), \ - FN(set_hash), + FN(set_hash), \ + FN(setsockopt), \ + FN(skb_adjust_room), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -618,6 +641,11 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Mode for BPF_FUNC_skb_adjust_room helper. */ +enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET_OPTS, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -720,4 +748,54 @@ struct bpf_map_info { __u32 map_flags; } __attribute__((aligned(8))); +/* User bpf_sock_ops struct to access socket values and specify request ops + * and their replies. + * New fields can only be added at the end of this structure + */ +struct bpf_sock_ops { + __u32 op; + union { + __u32 reply; + __u32 replylong[4]; + }; + __u32 family; + __u32 remote_ip4; + __u32 local_ip4; + __u32 remote_ip6[4]; + __u32 local_ip6[4]; + __u32 remote_port; + __u32 local_port; +}; + +/* List of known BPF sock_ops operators. + * New entries can only be added at the end + */ +enum { + BPF_SOCK_OPS_VOID, + BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or + * -1 if default value should be used + */ + BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized + * window (in packets) or -1 if default + * value should be used + */ + BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an + * active connection is initialized + */ + BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an + * active connection is + * established + */ + BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a + * passive connection is + * established + */ + BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control + * needs ECN + */ +}; + +#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ +#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 84e7e698411e..a2670e9d652d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -619,7 +619,7 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp, struct map *map, unsigned long offs) { struct symbol *sym; - u64 addr = tp->address + tp->offset - offs; + u64 addr = tp->address - offs; sym = map__find_symbol(map, addr); if (!sym) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c0af0195432f..404aec520812 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2658,6 +2658,171 @@ static struct bpf_test tests[] = { .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { + "direct packet access: test18 (imm += pkt_ptr, 1)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test19 (imm += pkt_ptr, 2)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + BPF_MOV64_IMM(BPF_REG_4, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test20 (x += pkt_ptr, 1)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "direct packet access: test21 (x += pkt_ptr, 2)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9), + BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0xffff), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "direct packet access: test22 (x += pkt_ptr, 3)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), + BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 48), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "direct packet access: test23 (x += pkt_ptr, 4)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "cannot add integer value with 47 upper zero bits to ptr_to_packet", + }, + { + "direct packet access: test24 (x += pkt_ptr, 5)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 64), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { "helper access to packet: test1, valid packet_ptr range", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -3767,6 +3932,72 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", }, { + "leak pointer into ctx 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 2 }, + .errstr_unpriv = "R2 leaks addr into mem", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "leak pointer into ctx 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R10 leaks addr into mem", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "leak pointer into ctx 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 1 }, + .errstr_unpriv = "R2 leaks addr into ctx", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "leak pointer into map val", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr_unpriv = "R6 leaks addr into mem", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { "helper access to map: full range", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |