585 files changed, 17293 insertions, 4689 deletions
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
index 42c3bb2d53e8..01e331a5f3e7 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
@@ -41,9 +41,9 @@ Required properties:
 Optional properties:
 
 In order to use the GPIO lines in PWM mode, some additional optional
-properties are required. Only Armada 370 and XP support these properties.
+properties are required.
 
-- compatible: Must contain "marvell,armada-370-xp-gpio"
+- compatible: Must contain "marvell,armada-370-gpio"
 
 - reg: an additional register set is needed, for the GPIO Blink
   Counter on/off registers.
@@ -71,7 +71,7 @@ Example:
 		};
 
 		gpio1: gpio@18140 {
-			compatible = "marvell,armada-370-xp-gpio";
+			compatible = "marvell,armada-370-gpio";
 			reg = <0x18140 0x40>, <0x181c8 0x08>;
 			reg-names = "gpio", "pwm";
 			ngpios = <17>;
diff --git a/Documentation/devicetree/bindings/mfd/stm32-timers.txt b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
index bbd083f5600a..1db6e0057a63 100644
--- a/Documentation/devicetree/bindings/mfd/stm32-timers.txt
+++ b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
@@ -31,7 +31,7 @@ Example:
 		compatible = "st,stm32-timers";
 		reg = <0x40010000 0x400>;
 		clocks = <&rcc 0 160>;
-		clock-names = "clk_int";
+		clock-names = "int";
 
 		pwm {
 			compatible = "st,stm32-pwm";
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 1506e948610c..27966ae741e0 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -22,6 +22,7 @@ Required properties:
 	Required elements: 'pclk', 'hclk'
 	Optional elements: 'tx_clk'
 	Optional elements: 'rx_clk' applies to cdns,zynqmp-gem
+	Optional elements: 'tsu_clk'
 - clocks: Phandles to input clocks.
 
 Optional properties for PHY child node:
diff --git a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
index c627bbb3009e..60c833d62181 100644
--- a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
+++ b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
@@ -13,14 +13,10 @@ Optional SoC Specific Properties:
 - pinctrl-names: Contains only one value - "default".
 - pintctrl-0: Specifies the pin control groups used for this controller.
 - autosuspend-delay: Specify autosuspend delay in milliseconds.
-- vin-voltage-override: Specify voltage of VIN pin in microvolts.
 - irq-status-read-quirk: Specify that the trf7970a being used has the
   "IRQ Status Read" erratum.
 - en2-rf-quirk: Specify that the trf7970a being used has the "EN2 RF"
   erratum.
-- t5t-rmb-extra-byte-quirk: Specify that the trf7970a has the erratum
-  where an extra byte is returned by Read Multiple Block commands issued
-  to Type 5 tags.
 - vdd-io-supply: Regulator specifying voltage for vdd-io
 - clock-frequency: Set to specify that the input frequency to the trf7970a is 13560000Hz or 27120000Hz
 
@@ -37,15 +33,13 @@ Example (for ARM-based BeagleBone with TRF7970A on SPI1):
 		spi-max-frequency = <2000000>;
 		interrupt-parent = <&gpio2>;
 		interrupts = <14 0>;
-		ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>,
-				  <&gpio2 5 GPIO_ACTIVE_LOW>;
+		ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_HIGH>,
+				  <&gpio2 5 GPIO_ACTIVE_HIGH>;
 		vin-supply = <&ldo3_reg>;
-		vin-voltage-override = <5000000>;
 		vdd-io-supply = <&ldo2_reg>;
 		autosuspend-delay = <30000>;
 		irq-status-read-quirk;
 		en2-rf-quirk;
-		t5t-rmb-extra-byte-quirk;
 		clock-frequency = <27120000>;
 		status = "okay";
 	};
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt
index 24196cef7c91..1fe42a874aae 100644
--- a/Documentation/networking/ipvlan.txt
+++ b/Documentation/networking/ipvlan.txt
@@ -22,9 +22,9 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module
 	There are no module parameters for this driver and it can be configured
 using IProute2/ip utility.
 
-	ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | l3 | l3s }
+	ip link add link <master-dev> name <slave-dev> type ipvlan mode { l2 | l3 | l3s }
 
-	e.g. ip link add link ipvl0 eth0 type ipvlan mode l2
+	e.g. ip link add link eth0 name ipvl0 type ipvlan mode l2
 
 
 4. Operating modes:
diff --git a/Documentation/networking/policy-routing.txt b/Documentation/networking/policy-routing.txt
deleted file mode 100644
index 36f6936d7f21..000000000000
--- a/Documentation/networking/policy-routing.txt
+++ /dev/null
@@ -1,150 +0,0 @@
-Classes
--------
-
-	"Class" is a complete routing table in common sense.
-	I.e. it is tree of nodes (destination prefix, tos, metric)
-	with attached information: gateway, device etc.
-	This tree is looked up as specified in RFC1812 5.2.4.3
-	1. Basic match
-	2. Longest match
-	3. Weak TOS.
-	4. Metric. (should not be in kernel space, but they are)
-	5. Additional pruning rules. (not in kernel space).
-	
-	We have two special type of nodes:
-	REJECT - abort route lookup and return an error value.
-	THROW  - abort route lookup in this class.
-
-
-	Currently the number of classes is limited to 255
-	(0 is reserved for "not specified class")
-
-	Three classes are builtin:
-
-	RT_CLASS_LOCAL=255 - local interface addresses,
-	broadcasts, nat addresses.
-
-	RT_CLASS_MAIN=254  - all normal routes are put there
-	by default.
-
-	RT_CLASS_DEFAULT=253 - if ip_fib_model==1, then
-	normal default routes are put there, if ip_fib_model==2
-	all gateway routes are put there.
-
-
-Rules
------
-	Rule is a record of (src prefix, src interface, tos, dst prefix)
-	with attached information.
-
-	Rule types:
-	RTP_ROUTE - lookup in attached class
-	RTP_NAT   - lookup in attached class and if a match is found,
-		    translate packet source address.
-	RTP_MASQUERADE - lookup in attached class and if a match is found,
-		    masquerade packet as sourced by us.
-	RTP_DROP   - silently drop the packet.
-	RTP_REJECT - drop the packet and send ICMP NET UNREACHABLE.
-	RTP_PROHIBIT - drop the packet and send ICMP COMM. ADM. PROHIBITED.
-
-	Rule flags:
-	RTRF_LOG - log route creations.
-	RTRF_VALVE - One way route (used with masquerading)
-
-Default setup:
-
-root@amber:/pub/ip-routing # iproute -r
-Kernel routing policy rules
-Pref Source             Destination        TOS Iface   Cl
-   0 default            default            00  *       255
- 254 default            default            00  *       254
- 255 default            default            00  *       253
-
-
-Lookup algorithm
-----------------
-
-	We scan rules list, and if a rule is matched, apply it.
-	If a route is found, return it.
-	If it is not found or a THROW node was matched, continue
-	to scan rules.
-
-Applications
-------------
-
-1.	Just ignore classes. All the routes are put into MAIN class
-	(and/or into DEFAULT class).
-
-	HOWTO:  iproute add PREFIX [ tos TOS ] [ gw GW ] [ dev DEV ]
-		[ metric METRIC ] [ reject ] ... (look at iproute utility)
-
-		or use route utility from current net-tools.
-		
-2.	Opposite case. Just forget all that you know about routing
-	tables. Every rule is supplied with its own gateway, device
-	info. record. This approach is not appropriate for automated
-	route maintenance, but it is ideal for manual configuration.
-
-	HOWTO:  iproute addrule [ from PREFIX ] [ to PREFIX ] [ tos TOS ]
-		[ dev INPUTDEV] [ pref PREFERENCE ] route [ gw GATEWAY ]
-		[ dev OUTDEV ] .....
-
-	Warning: As of now the size of the routing table in this
-	approach is limited to 256. If someone likes this model, I'll
-	relax this limitation.
-
-3.	OSPF classes (see RFC1583, RFC1812 E.3.3)
-	Very clean, stable and robust algorithm for OSPF routing
-	domains. Unfortunately, it is not widely used in the Internet.
-
-	Proposed setup:
-	255 local addresses
-	254 interface routes
-	253 ASE routes with external metric
-	252 ASE routes with internal metric
-	251 inter-area routes
-	250 intra-area routes for 1st area
-	249 intra-area routes for 2nd area
-	etc.
-	
-	Rules:
-	iproute addrule class 253
-	iproute addrule class 252
-	iproute addrule class 251
-	iproute addrule to a-prefix-for-1st-area class 250
-	iproute addrule to another-prefix-for-1st-area class 250
-	...
-	iproute addrule to a-prefix-for-2nd-area class 249
-	...
-
-	Area classes must be terminated with reject record.
-	iproute add default reject class 250
-	iproute add default reject class 249
-	...
-
-4.	The Variant Router Requirements Algorithm (RFC1812 E.3.2)
-	Create 16 classes for different TOS values.
-	It is a funny, but pretty useless algorithm.
-	I listed it just to show the power of new routing code.
-
-5.	All the variety of combinations......
-
-
-GATED
------
-
-	Gated does not understand classes, but it will work
-	happily in MAIN+DEFAULT. All policy routes can be set
-	and maintained manually.
-
-IMPORTANT NOTE
---------------
-	route.c has a compilation time switch CONFIG_IP_LOCAL_RT_POLICY.
-	If it is set, locally originated packets are routed
-	using all the policy list. This is not very convenient and
-	pretty ambiguous when used with NAT and masquerading.
-	I set it to FALSE by default.
-
-
-Alexey Kuznetov
-[email protected]
diff --git a/MAINTAINERS b/MAINTAINERS
index f81e1b765353..5bebe20811c4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2967,7 +2967,7 @@ F:	sound/pci/oxygen/
 
 C6X ARCHITECTURE
 M:	Mark Salter <[email protected]>
-M:	Aurelien Jacquiot <[email protected]>
+M:	Aurelien Jacquiot <[email protected]>
 L:	[email protected]
 W:	http://www.linux-c6x.org/wiki/index.php/Main_Page
 S:	Maintained
@@ -8330,6 +8330,16 @@ Q:	http://patchwork.ozlabs.org/project/netdev/list/
 F:	drivers/net/ethernet/mellanox/mlx5/core/fpga/*
 F:	include/linux/mlx5/mlx5_ifc_fpga.h
 
+MELLANOX ETHERNET INNOVA IPSEC DRIVER
+M:	Ilan Tayari <[email protected]>
+R:	Boris Pismenny <[email protected]>
+L:	[email protected]
+S:	Supported
+W:	http://www.mellanox.com
+Q:	http://patchwork.ozlabs.org/project/netdev/list/
+F:	drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/*
+F:	drivers/net/ethernet/mellanox/mlx5/core/ipsec*
+
 MELLANOX ETHERNET SWITCH DRIVERS
 M:	Jiri Pirko <[email protected]>
 M:	Ido Schimmel <[email protected]>
@@ -9072,9 +9082,6 @@ F:	include/uapi/linux/nfc.h
 F:	drivers/nfc/
 F:	include/linux/platform_data/nfcmrvl.h
 F:	include/linux/platform_data/nxp-nci.h
-F:	include/linux/platform_data/pn544.h
-F:	include/linux/platform_data/st21nfca.h
-F:	include/linux/platform_data/st-nci.h
 F:	Documentation/devicetree/bindings/net/nfc/
 
 NFS, SUNRPC, AND LOCKD CLIENTS
@@ -11409,6 +11416,14 @@ F:	kernel/time/alarmtimer.c
 F:	kernel/time/ntp.c
 F:	tools/testing/selftests/timers/
 
+TI TRF7970A NFC DRIVER
+M:	Mark Greer <[email protected]>
+L:	[email protected]
+L:	[email protected] (moderated for non-subscribers)
+S:	Supported
+F:	drivers/nfc/trf7970a.c
+F:	Documentation/devicetree/bindings/net/nfc/trf7970a.txt
+
 SC1200 WDT DRIVER
 M:	Zwane Mwaikambo <[email protected]>
 S:	Maintained
diff --git a/Makefile b/Makefile
index e40c471abe29..6d8a984ed9c9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -1437,7 +1437,7 @@ help:
 	@echo  '  make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
 	@echo  '  make V=2   [targets] 2 => give reason for rebuild of target'
 	@echo  '  make O=dir [targets] Locate all output files in "dir", including .config'
-	@echo  '  make C=1   [targets] Check all c source with $$CHECK (sparse by default)'
+	@echo  '  make C=1   [targets] Check re-compiled c source with $$CHECK (sparse by default)'
 	@echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
 	@echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
 	@echo  '  make W=n   [targets] Enable extra gcc checks, n=1,2,3 where'
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 6e1242da0159..4104a0839214 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -86,8 +86,6 @@ struct task_struct;
 #define TSK_K_BLINK(tsk)	TSK_K_REG(tsk, 4)
 #define TSK_K_FP(tsk)		TSK_K_REG(tsk, 0)
 
-#define thread_saved_pc(tsk)	TSK_K_BLINK(tsk)
-
 extern void start_thread(struct pt_regs * regs, unsigned long pc,
 			 unsigned long usp);
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4c1a35f15838..c0fcab6a5504 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1416,6 +1416,7 @@ choice
 	config VMSPLIT_3G
 		bool "3G/1G user/kernel split"
 	config VMSPLIT_3G_OPT
+		depends on !ARM_LPAE
 		bool "3G/1G user/kernel split (for full 1G low memory)"
 	config VMSPLIT_2G
 		bool "2G/2G user/kernel split"
diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
index 3f7d1b74c5e0..a17ca8d78656 100644
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S
@@ -17,7 +17,8 @@
 		@ there.
 		.inst	'M' | ('Z' << 8) | (0x1310 << 16)   @ tstne r0, #0x4d000
 #else
-		W(mov)	r0, r0
+ AR_CLASS(	mov	r0, r0		)
+  M_CLASS(	nop.w			)
 #endif
 		.endm
 
diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
index dd3525a0f06a..9e8b082c134f 100644
--- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
+++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
@@ -57,7 +57,6 @@
 	aliases {
 		serial0 = &uart0;
 		/* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */
-		ethernet0 = &emac;
 		ethernet1 = &xr819;
 	};
 
@@ -104,13 +103,6 @@
 	status = "okay";
 };
 
-&emac {
-	phy-handle = <&int_mii_phy>;
-	phy-mode = "mii";
-	allwinner,leds-active-low;
-	status = "okay";
-};
-
 &mmc0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc0_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
index 78f6c24952dd..8d2cc6e9a03f 100644
--- a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
+++ b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
@@ -46,10 +46,3 @@
 	model = "FriendlyARM NanoPi NEO";
 	compatible = "friendlyarm,nanopi-neo", "allwinner,sun8i-h3";
 };
-
-&emac {
-	phy-handle = <&int_mii_phy>;
-	phy-mode = "mii";
-	allwinner,leds-active-low;
-	status = "okay";
-};
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
index cedd326b6089..5b6d14555b7c 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
@@ -54,7 +54,6 @@
 	aliases {
 		serial0 = &uart0;
 		/* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */
-		ethernet0 = &emac;
 		ethernet1 = &rtl8189;
 	};
 
@@ -109,13 +108,6 @@
 	status = "okay";
 };
 
-&emac {
-	phy-handle = <&int_mii_phy>;
-	phy-mode = "mii";
-	allwinner,leds-active-low;
-	status = "okay";
-};
-
 &ir {
 	pinctrl-names = "default";
 	pinctrl-0 = <&ir_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
index 6880268e8b87..5fea430e0eb1 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
@@ -52,7 +52,6 @@
 	compatible = "xunlong,orangepi-one", "allwinner,sun8i-h3";
 
 	aliases {
-		ethernet0 = &emac;
 		serial0 = &uart0;
 	};
 
@@ -98,13 +97,6 @@
 	status = "okay";
 };
 
-&emac {
-	phy-handle = <&int_mii_phy>;
-	phy-mode = "mii";
-	allwinner,leds-active-low;
-	status = "okay";
-};
-
 &mmc0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
index a10281b455f5..8b93f5c781a7 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
@@ -53,11 +53,6 @@
 	};
 };
 
-&emac {
-	/* LEDs changed to active high on the plus */
-	/delete-property/ allwinner,leds-active-low;
-};
-
 &mmc1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc1_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
index 52e65755c51a..f148111c326d 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
@@ -52,7 +52,6 @@
 	compatible = "xunlong,orangepi-pc", "allwinner,sun8i-h3";
 
 	aliases {
-		ethernet0 = &emac;
 		serial0 = &uart0;
 	};
 
@@ -110,13 +109,6 @@
 	status = "okay";
 };
 
-&emac {
-	phy-handle = <&int_mii_phy>;
-	phy-mode = "mii";
-	allwinner,leds-active-low;
-	status = "okay";
-};
-
 &ir {
 	pinctrl-names = "default";
 	pinctrl-0 = <&ir_pins_a>;
diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
index a6d4fda544e1..d4f600dbb7eb 100644
--- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi
+++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
@@ -83,12 +83,6 @@
 		#size-cells = <1>;
 		ranges;
 
-		syscon: syscon@1c00000 {
-			compatible = "allwinner,sun8i-h3-system-controller",
-				"syscon";
-			reg = <0x01c00000 0x1000>;
-		};
-
 		dma: dma-controller@01c02000 {
 			compatible = "allwinner,sun8i-h3-dma";
 			reg = <0x01c02000 0x1000>;
@@ -285,14 +279,6 @@
 			interrupt-controller;
 			#interrupt-cells = <3>;
 
-			emac_rgmii_pins: emac0 {
-				pins = "PD0", "PD1", "PD2", "PD3", "PD4",
-				       "PD5", "PD7", "PD8", "PD9", "PD10",
-				       "PD12", "PD13", "PD15", "PD16", "PD17";
-				function = "emac";
-				drive-strength = <40>;
-			};
-
 			i2c0_pins: i2c0 {
 				pins = "PA11", "PA12";
 				function = "i2c0";
@@ -389,32 +375,6 @@
 			clocks = <&osc24M>;
 		};
 
-		emac: ethernet@1c30000 {
-			compatible = "allwinner,sun8i-h3-emac";
-			syscon = <&syscon>;
-			reg = <0x01c30000 0x104>;
-			interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
-			interrupt-names = "macirq";
-			resets = <&ccu RST_BUS_EMAC>;
-			reset-names = "stmmaceth";
-			clocks = <&ccu CLK_BUS_EMAC>;
-			clock-names = "stmmaceth";
-			#address-cells = <1>;
-			#size-cells = <0>;
-			status = "disabled";
-
-			mdio: mdio {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				int_mii_phy: ethernet-phy@1 {
-					compatible = "ethernet-phy-ieee802.3-c22";
-					reg = <1>;
-					clocks = <&ccu CLK_BUS_EPHY>;
-					resets = <&ccu RST_BUS_EPHY>;
-				};
-			};
-		};
-
 		spi0: spi@01c68000 {
 			compatible = "allwinner,sun8i-h3-spi";
 			reg = <0x01c68000 0x1000>;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 6da6af8881f7..2685e03600b1 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -257,7 +257,6 @@ CONFIG_SMSC911X=y
 CONFIG_STMMAC_ETH=y
 CONFIG_STMMAC_PLATFORM=y
 CONFIG_DWMAC_DWC_QOS_ETH=y
-CONFIG_DWMAC_SUN8I=y
 CONFIG_TI_CPSW=y
 CONFIG_XILINX_EMACLITE=y
 CONFIG_AT803X_PHY=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 504e02238031..5cd5dd70bc83 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -40,7 +40,6 @@ CONFIG_ATA=y
 CONFIG_AHCI_SUNXI=y
 CONFIG_NETDEVICES=y
 CONFIG_SUN4I_EMAC=y
-CONFIG_DWMAC_SUN8I=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 32e1a9513dc7..4e80bf7420d4 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -315,7 +315,7 @@ static void __init cacheid_init(void)
 	if (arch >= CPU_ARCH_ARMv6) {
 		unsigned int cachetype = read_cpuid_cachetype();
 
-		if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+		if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) {
 			cacheid = 0;
 		} else if ((cachetype & (7 << 29)) == 4 << 29) {
 			/* ARMv7 register format */
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
index 0d1f026d831a..6872135d7f84 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
@@ -67,14 +67,6 @@
 	};
 };
 
-&emac {
-	pinctrl-names = "default";
-	pinctrl-0 = <&rgmii_pins>;
-	phy-mode = "rgmii";
-	phy-handle = <&ext_rgmii_phy>;
-	status = "okay";
-};
-
 &i2c1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c1_pins>;
@@ -85,13 +77,6 @@
 	bias-pull-up;
 };
 
-&mdio {
-	ext_rgmii_phy: ethernet-phy@1 {
-		compatible = "ethernet-phy-ieee802.3-c22";
-		reg = <1>;
-	};
-};
-
 &mmc0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc0_pins>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
index 24f1aac366d6..790d14daaa6a 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
@@ -46,20 +46,5 @@
 	model = "Pine64+";
 	compatible = "pine64,pine64-plus", "allwinner,sun50i-a64";
 
-	/* TODO: Camera, touchscreen, etc. */
-};
-
-&emac {
-	pinctrl-names = "default";
-	pinctrl-0 = <&rgmii_pins>;
-	phy-mode = "rgmii";
-	phy-handle = <&ext_rgmii_phy>;
-	status = "okay";
-};
-
-&mdio {
-	ext_rgmii_phy: ethernet-phy@1 {
-		compatible = "ethernet-phy-ieee802.3-c22";
-		reg = <1>;
-	};
+	/* TODO: Camera, Ethernet PHY, touchscreen, etc. */
 };
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
index 3b491c0e3b0d..c680ed385da3 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
@@ -70,15 +70,6 @@
 	status = "okay";
 };
 
-&emac {
-	pinctrl-names = "default";
-	pinctrl-0 = <&rmii_pins>;
-	phy-mode = "rmii";
-	phy-handle = <&ext_rmii_phy1>;
-	status = "okay";
-
-};
-
 &i2c1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c1_pins>;
@@ -89,13 +80,6 @@
 	bias-pull-up;
 };
 
-&mdio {
-	ext_rmii_phy1: ethernet-phy@1 {
-		compatible = "ethernet-phy-ieee802.3-c22";
-		reg = <1>;
-	};
-};
-
 &mmc0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc0_pins>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
index 769ced01a998..166c9ef884dc 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -129,12 +129,6 @@
 		#size-cells = <1>;
 		ranges;
 
-		syscon: syscon@1c00000 {
-			compatible = "allwinner,sun50i-a64-system-controller",
-				"syscon";
-			reg = <0x01c00000 0x1000>;
-		};
-
 		mmc0: mmc@1c0f000 {
 			compatible = "allwinner,sun50i-a64-mmc";
 			reg = <0x01c0f000 0x1000>;
@@ -287,21 +281,6 @@
 				bias-pull-up;
 			};
 
-			rmii_pins: rmii_pins {
-				pins = "PD10", "PD11", "PD13", "PD14", "PD17",
-				       "PD18", "PD19", "PD20", "PD22", "PD23";
-				function = "emac";
-				drive-strength = <40>;
-			};
-
-			rgmii_pins: rgmii_pins {
-				pins = "PD8", "PD9", "PD10", "PD11", "PD12",
-				       "PD13", "PD15", "PD16", "PD17", "PD18",
-				       "PD19", "PD20", "PD21", "PD22", "PD23";
-				function = "emac";
-				drive-strength = <40>;
-			};
-
 			uart0_pins_a: uart0@0 {
 				pins = "PB8", "PB9";
 				function = "uart0";
@@ -406,26 +385,6 @@
 			#size-cells = <0>;
 		};
 
-		emac: ethernet@1c30000 {
-			compatible = "allwinner,sun50i-a64-emac";
-			syscon = <&syscon>;
-			reg = <0x01c30000 0x100>;
-			interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
-			interrupt-names = "macirq";
-			resets = <&ccu RST_BUS_EMAC>;
-			reset-names = "stmmaceth";
-			clocks = <&ccu CLK_BUS_EMAC>;
-			clock-names = "stmmaceth";
-			status = "disabled";
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			mdio: mdio {
-				#address-cells = <1>;
-				#size-cells = <0>;
-			};
-		};
-
 		gic: interrupt-controller@1c81000 {
 			compatible = "arm,gic-400";
 			reg = <0x01c81000 0x1000>,
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index d789858c4f1b..97c123e09e45 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -191,7 +191,6 @@ CONFIG_RAVB=y
 CONFIG_SMC91X=y
 CONFIG_SMSC911X=y
 CONFIG_STMMAC_ETH=m
-CONFIG_DWMAC_SUN8I=m
 CONFIG_MDIO_BUS_MUX_MMIOREG=y
 CONFIG_MESON_GXL_PHY=m
 CONFIG_MICREL_PHY=y
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 41b6e31f8f55..d0cb007fa482 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk)
 		/* tkr_mono.cycle_last == tkr_raw.cycle_last */
 		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
 		vdso_data->raw_time_sec		= tk->raw_time.tv_sec;
-		vdso_data->raw_time_nsec	= tk->raw_time.tv_nsec;
+		vdso_data->raw_time_nsec	= (tk->raw_time.tv_nsec <<
+						   tk->tkr_raw.shift) +
+						  tk->tkr_raw.xtime_nsec;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
 		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
-		/* tkr_raw.xtime_nsec == 0 */
 		vdso_data->cs_mono_mult		= tk->tkr_mono.mult;
 		vdso_data->cs_raw_mult		= tk->tkr_raw.mult;
 		/* tkr_mono.shift == tkr_raw.shift */
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index e00b4671bd7c..76320e920965 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -256,7 +256,6 @@ monotonic_raw:
 	seqcnt_check fail=monotonic_raw
 
 	/* All computations are done with left-shifted nsecs. */
-	lsl	x14, x14, x12
 	get_nsec_per_sec res=x9
 	lsl	x9, x9, x12
 
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 85d4af97c986..dbdbb8a558df 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk)	(tsk->thread.pc)
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)							\
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index b9eb3da7f278..7c87b5be53b5 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -96,11 +96,6 @@ static inline void release_thread(struct task_struct *dead_task)
 #define release_segments(mm)		do { } while (0)
 
 /*
- * saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
-/*
  * saved kernel SP and DP of a blocked thread.
  */
 #ifdef _BIG_ENDIAN
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index e299d30105b5..a2cdb1521aca 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -69,14 +69,6 @@ void hard_reset_now (void)
 	while(1) /* waiting for RETRIBUTION! */ ;
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return task_pt_regs(t)->irp;
-}
-
 /* setup the child's kernel stack with a pt_regs and switch_stack on it.
  * it will be un-nested during _resume and _ret_from_sys_call when the
  * new thread is scheduled.
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index c530a8fa87ce..fe87b383fbf3 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -85,14 +85,6 @@ hard_reset_now(void)
 }
 
 /*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return task_pt_regs(t)->erp;
-}
-
-/*
  * Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
  * It will be unnested during _resume and _ret_from_sys_call when the new thread
  * is scheduled.
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index 15b815df29c1..bc2729e4b2c9 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_ESP(tsk)   ((tsk) == current ? rdusp() : (tsk)->thread.usp)
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 /* Free all resources held by a thread. */
 static inline void release_thread(struct task_struct *dead_task)
 {
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index ddaeb9cc9143..e4d08d74ed9f 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ...
 #define release_segments(mm)		do { } while (0)
 #define forget_segments()		do { } while (0)
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	((tsk)->thread.frame0->pc)
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 5a4c92abc99e..a957b374e3a6 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p)
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(tsk->thread.pc))
-		return ((unsigned long *)tsk->thread.fp)[2];
-	else
-		return tsk->thread.pc;
-}
-
 int elf_check_arch(const struct elf32_hdr *hdr)
 {
 	unsigned long hsr0 = __get_HSR(0);
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index 65132d7ae9e5..afa53147e66a 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk);
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	\
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 0f5db5bb561b..d1ddcabbbe83 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags,
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return ((struct pt_regs *)tsk->thread.esp0)->pc;
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long fp, pc;
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 45a825402f63..ce67940860a5 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -33,9 +33,6 @@
 /*  task_struct, defined elsewhere, is the "process descriptor" */
 struct task_struct;
 
-/*  this is defined in arch/process.c  */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
 
 /*
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index de715bab7956..656050c2e6a0 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -61,14 +61,6 @@ void arch_cpu_idle(void)
 }
 
 /*
- *  Return saved PC of a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return 0;
-}
-
-/*
  * Copy architecture-specific thread state
  */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 26a63d69c599..ab982f07ea68 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -602,23 +602,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
 }
 
 /*
- * Return saved PC of a blocked thread.
- * Note that the only way T can block is through a call to schedule() -> switch_to().
- */
-static inline unsigned long
-thread_saved_pc (struct task_struct *t)
-{
-	struct unw_frame_info info;
-	unsigned long ip;
-
-	unw_init_from_blocked_task(&info, t);
-	if (unw_unwind(&info) < 0)
-		return 0;
-	unw_get_ip(&info, &ip);
-	return ip;
-}
-
-/*
  * Get the current instruction/program counter value.
  */
 #define current_text_addr() \
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 5767367550c6..657874eeeccc 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *);
 extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
 extern void release_segments(struct mm_struct * mm);
 
-extern unsigned long thread_saved_pc(struct task_struct *);
-
 /* Copy and release all segment info associated with a VM */
 #define copy_segments(p, mm)  do { } while (0)
 #define release_segments(mm)  do { } while (0)
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index d8ffcfec599c..8cd7e03f4370 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -39,14 +39,6 @@
 
 #include <linux/err.h>
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return tsk->thread.lr;
-}
-
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index 77239e81379b..94c36030440c 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	\
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index e475c945c8b2..7df92f8b0781 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,20 +40,6 @@
 asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
-
-/*
- * Return saved PC from a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(sw->retpc))
-		return ((unsigned long *)sw->a6)[1];
-	else
-		return sw->retpc;
-}
-
 void arch_cpu_idle(void)
 {
 #if defined(MACH_ATARI_ONLY)
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 37ef196e4519..330d556860ba 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 # define KSTK_EIP(tsk)	(0)
@@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/* Return saved (kernel) PC of a blocked thread.  */
-#  define thread_saved_pc(tsk)	\
-	((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
-
 unsigned long get_wchan(struct task_struct *p);
 
 /* The size allocated for kernel stacks. This _must_ be a power of two! */
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index e92a817e645f..6527ec22f158 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-#ifndef CONFIG_MMU
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct cpu_context *ctx =
-		&(((struct thread_info *)(tsk->stack))->cpu_context);
-
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(ctx->r15))
-		return (unsigned long)ctx->r15;
-	else
-		return ctx->r14;
-}
-#endif
-
 unsigned long get_wchan(struct task_struct *p)
 {
 /* TBD (used by procfs) */
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 7c6336dd2638..7cd92166a0b9 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
 int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
 			  bool user, bool kernel)
 {
-	int idx_user, idx_kernel;
+	/*
+	 * Initialize idx_user and idx_kernel to workaround bogus
+	 * maybe-initialized warning when using GCC 6.
+	 */
+	int idx_user = 0, idx_kernel = 0;
 	unsigned long flags, old_entryhi;
 
 	local_irq_save(flags);
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 18e17abf7664..3ae479117b42 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs,
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define task_pt_regs(task) ((task)->thread.uregs)
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index c9fa42619c6a..89e8027e07fb 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -40,14 +40,6 @@
 #include "internal.h"
 
 /*
- * return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return ((unsigned long *) tsk->thread.sp)[3];
-}
-
-/*
  * power off function, if any
  */
 void (*pm_power_off)(void);
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index 3bbbc3d798e5..4944e2e1d8b0 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/* Return saved PC of a blocked thread. */
-#define thread_saved_pc(tsk)	((tsk)->thread.kregs->ea)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define task_pt_regs(p) \
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index a908e6c30a00..396d8f306c21 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
 void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
 
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 106859ae27ff..f9b77003f113 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs)
 	show_registers(regs);
 }
 
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return (unsigned long)user_regs(t->stack)->pc;
-}
-
 void release_thread(struct task_struct *dead_task)
 {
 }
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index a3661ee6b060..4c6694b4e77e 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -163,12 +163,7 @@ struct thread_struct {
 	.flags		= 0 \
 	}
 
-/*
- * Return saved PC of a blocked thread.  This is used by ps mostly.
- */
-
 struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *t);
 void show_trace(struct task_struct *task, unsigned long *stack);
 
 /*
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 4516a5b53f38..b64d7d21646e 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return t->thread.regs.kpc;
-}
-
 unsigned long
 get_wchan(struct task_struct *p)
 {
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index a83821f33ea3..8814a7249ceb 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_handler(struct pt_regs *regs);
 extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
 			   struct kprobe_ctlblk *kcb);
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index bb99b651085a..1189d04f3bd1 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -378,12 +378,6 @@ struct thread_struct {
 }
 #endif
 
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk)    \
-        ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
-
 #define task_pt_regs(tsk)	((struct pt_regs *)(tsk)->thread.regs)
 
 unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae418b85c17c..b886795060fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
 	.balign	IFETCH_ALIGN_BYTES
 do_hash_page:
 #ifdef CONFIG_PPC_STD_MMU_64
-	andis.	r0,r4,0xa410		/* weird error? */
+	andis.	r0,r4,0xa450		/* weird error? */
 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
-	andis.  r0,r4,DSISR_DABRMATCH@h
-	bne-    handle_dabr_fault
 	CURRENT_THREAD_INFO(r11, r1)
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
 	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1436,16 @@ do_hash_page:
 
 	/* Error */
 	blt-	13f
+
+	/* Reload DSISR into r4 for the DABR check below */
+	ld      r4,_DSISR(r1)
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 /* Here we have a page fault that hash_page can't handle. */
 handle_page_fault:
-11:	ld	r4,_DAR(r1)
+11:	andis.  r0,r4,DSISR_DABRMATCH@h
+	bne-    handle_dabr_fault
+	ld	r4,_DAR(r1)
 	ld	r5,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_page_fault
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fc4343514bed..01addfb0ed0a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
 
+int is_current_kprobe_addr(unsigned long addr)
+{
+	struct kprobe *p = kprobe_running();
+	return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
 	return  (addr >= (unsigned long)__kprobes_text_start &&
@@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
 #endif
 
+	/*
+	 * jprobes use jprobe_return() which skips the normal return
+	 * path of the function, and this messes up the accounting of the
+	 * function graph tracer.
+	 *
+	 * Pause function graph tracing while performing the jprobe function.
+	 */
+	pause_graph_tracing();
+
 	return 1;
 }
 NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	 * saved regs...
 	 */
 	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+	/* It's OK to start function graph tracing again */
+	unpause_graph_tracing();
 	preempt_enable_no_resched();
 	return 1;
 }
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index a8c1f99e9607..4640f6d64f8b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void)
 #endif
 
 /*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+	ti->task = NULL;
+	ti->cpu = cpu;
+	ti->preempt_count = 0;
+	ti->local_flags = 0;
+	ti->flags = 0;
+	klp_init_thread_info(ti);
+}
+
+/*
  * Stack space used when we detect a bad kernel stack pointer, and
  * early in SMP boots before relocation is enabled. Exclusive emergency
  * stack for machine checks.
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
 	 * Since we use these as temporary stacks during secondary CPU
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
+	 *
+	 * The IRQ stacks allocated elsewhere in this file are zeroed and
+	 * initialized in kernel/irq.c. These are initialized here in order
+	 * to have emergency stacks available as early as possible.
 	 */
 	limit = min(safe_stack_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
 		struct thread_info *ti;
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
 		/* emergency stack for NMI exception handling. */
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
 
 		/* emergency stack for machine check exception handling. */
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
 #endif
 	}
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 7c933a99f5d5..c98e90b4ea7b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
 	stdu	r1,-SWITCH_FRAME_SIZE(r1)
 
 	/* Save all gprs to pt_regs */
-	SAVE_8GPRS(0,r1)
-	SAVE_8GPRS(8,r1)
-	SAVE_8GPRS(16,r1)
-	SAVE_8GPRS(24,r1)
+	SAVE_GPR(0, r1)
+	SAVE_10GPRS(2, r1)
+	SAVE_10GPRS(12, r1)
+	SAVE_10GPRS(22, r1)
+
+	/* Save previous stack pointer (r1) */
+	addi	r8, r1, SWITCH_FRAME_SIZE
+	std	r8, GPR1(r1)
 
 	/* Load special regs for save below */
 	mfmsr   r8
@@ -95,18 +99,44 @@ ftrace_call:
 	bl	ftrace_stub
 	nop
 
-	/* Load ctr with the possibly modified NIP */
-	ld	r3, _NIP(r1)
-	mtctr	r3
+	/* Load the possibly modified NIP */
+	ld	r15, _NIP(r1)
+
 #ifdef CONFIG_LIVEPATCH
-	cmpd	r14,r3		/* has NIP been altered? */
+	cmpd	r14, r15	/* has NIP been altered? */
+#endif
+
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+	/* NIP has not been altered, skip over further checks */
+	beq	1f
+
+	/* Check if there is an active kprobe on us */
+	subi	r3, r14, 4
+	bl	is_current_kprobe_addr
+	nop
+
+	/*
+	 * If r3 == 1, then this is a kprobe/jprobe.
+	 * else, this is livepatched function.
+	 *
+	 * The conditional branch for livepatch_handler below will use the
+	 * result of this comparison. For kprobe/jprobe, we just need to branch to
+	 * the new NIP, not call livepatch_handler. The branch below is bne, so we
+	 * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+	 * CR0[EQ] = (r3 == 1).
+	 */
+	cmpdi	r3, 1
+1:
 #endif
 
+	/* Load CTR with the possibly modified NIP */
+	mtctr	r15
+
 	/* Restore gprs */
-	REST_8GPRS(0,r1)
-	REST_8GPRS(8,r1)
-	REST_8GPRS(16,r1)
-	REST_8GPRS(24,r1)
+	REST_GPR(0,r1)
+	REST_10GPRS(2,r1)
+	REST_10GPRS(12,r1)
+	REST_10GPRS(22,r1)
 
 	/* Restore possibly modified LR */
 	ld	r0, _LINK(r1)
@@ -119,7 +149,10 @@ ftrace_call:
 	addi r1, r1, SWITCH_FRAME_SIZE
 
 #ifdef CONFIG_LIVEPATCH
-        /* Based on the cmpd above, if the NIP was altered handle livepatch */
+        /*
+	 * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+	 * not on a kprobe/jprobe, then handle livepatch.
+	 */
 	bne-	livepatch_handler
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 42b7a4fd57d9..8d1a365b8edc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
 		break;
 	case KVM_REG_PPC_TB_OFFSET:
+		/*
+		 * POWER9 DD1 has an erratum where writing TBU40 causes
+		 * the timebase to lose ticks.  So we don't let the
+		 * timebase offset be changed on P9 DD1.  (It is
+		 * initialized to zero.)
+		 */
+		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+			break;
 		/* round up to multiple of 2^24 */
 		vcpu->arch.vcore->tb_offset =
 			ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	int r;
 	int srcu_idx;
+	unsigned long ebb_regs[3] = {};	/* shut up GCC */
+	unsigned long user_tar = 0;
+	unsigned int user_vrsave;
 
 	if (!vcpu->arch.sane) {
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		return -EINVAL;
 	}
 
+	/*
+	 * Don't allow entry with a suspended transaction, because
+	 * the guest entry/exit code will lose it.
+	 * If the guest has TM enabled, save away their TM-related SPRs
+	 * (they will get restored by the TM unavailable interrupt).
+	 */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+	    (current->thread.regs->msr & MSR_TM)) {
+		if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+			run->fail_entry.hardware_entry_failure_reason = 0;
+			return -EINVAL;
+		}
+		current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+		current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+		current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+		current->thread.regs->msr &= ~MSR_TM;
+	}
+#endif
+
 	kvmppc_core_prepare_to_enter(vcpu);
 
 	/* No need to go into the guest when all we'll do is come back out */
@@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	flush_all_to_thread(current);
 
+	/* Save userspace EBB and other register values */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		ebb_regs[0] = mfspr(SPRN_EBBHR);
+		ebb_regs[1] = mfspr(SPRN_EBBRR);
+		ebb_regs[2] = mfspr(SPRN_BESCR);
+		user_tar = mfspr(SPRN_TAR);
+	}
+	user_vrsave = mfspr(SPRN_VRSAVE);
+
 	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
 	vcpu->arch.pgdir = current->mm->pgd;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		}
 	} while (is_kvmppc_resume_guest(r));
 
+	/* Restore userspace EBB and other register values */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		mtspr(SPRN_EBBHR, ebb_regs[0]);
+		mtspr(SPRN_EBBRR, ebb_regs[1]);
+		mtspr(SPRN_BESCR, ebb_regs[2]);
+		mtspr(SPRN_TAR, user_tar);
+		mtspr(SPRN_FSCR, current->thread.fscr);
+	}
+	mtspr(SPRN_VRSAVE, user_vrsave);
+
  out:
 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
 	atomic_dec(&vcpu->kvm->arch.vcpus_running);
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a28970b..404deb512844 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	 * Put whatever is in the decrementer into the
 	 * hypervisor decrementer.
 	 */
+BEGIN_FTR_SECTION
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r6, VCORE_KVM(r5)
+	ld	r9, KVM_HOST_LPCR(r6)
+	andis.	r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mfspr	r8,SPRN_DEC
 	mftb	r7
-	mtspr	SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+	/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+	bne	32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	extsw	r8,r8
+32:	mtspr	SPRN_HDEC,r8
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bdb3f76ceb6b..4888dd494604 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -32,12 +32,29 @@
 #include <asm/opal.h>
 #include <asm/xive-regs.h>
 
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg)			\
+BEGIN_FTR_SECTION;				\
+	extsw	reg, reg;			\
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
 
 /* Values in HSTATE_NAPPING(r13) */
 #define NAPPING_CEDE	1
 #define NAPPING_NOVCPU	2
 
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS			144
+#define STACK_SLOT_TRAP		(SFS-4)
+#define STACK_SLOT_TID		(SFS-16)
+#define STACK_SLOT_PSSCR	(SFS-24)
+#define STACK_SLOT_PID		(SFS-32)
+#define STACK_SLOT_IAMR		(SFS-40)
+#define STACK_SLOT_CIABR	(SFS-48)
+#define STACK_SLOT_DAWR		(SFS-56)
+#define STACK_SLOT_DAWRX	(SFS-64)
+
 /*
  * Call kvmppc_hv_entry in real mode.
  * Must be called with interrupts hard-disabled.
@@ -214,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+	/* HDEC may be larger than DEC for arch >= v3.00, but since the */
+	/* HDEC value came from DEC in the first place, it will fit */
 	mfspr	r3, SPRN_HDEC
 	mtspr	SPRN_DEC, r3
 	/*
@@ -295,8 +314,9 @@ kvm_novcpu_wakeup:
 
 	/* See if our timeslice has expired (HDEC is negative) */
 	mfspr	r0, SPRN_HDEC
+	EXTEND_HDEC(r0)
 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-	cmpwi	r0, 0
+	cmpdi	r0, 0
 	blt	kvm_novcpu_exit
 
 	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -319,10 +339,10 @@ kvm_novcpu_exit:
 	bl	kvmhv_accumulate_time
 #endif
 13:	mr	r3, r12
-	stw	r12, 112-4(r1)
+	stw	r12, STACK_SLOT_TRAP(r1)
 	bl	kvmhv_commence_exit
 	nop
-	lwz	r12, 112-4(r1)
+	lwz	r12, STACK_SLOT_TRAP(r1)
 	b	kvmhv_switch_to_host
 
 /*
@@ -390,8 +410,8 @@ kvm_secondary_got_guest:
 	lbz	r4, HSTATE_PTID(r13)
 	cmpwi	r4, 0
 	bne	63f
-	lis	r6, 0x7fff
-	ori	r6, r6, 0xffff
+	LOAD_REG_ADDR(r6, decrementer_max)
+	ld	r6, 0(r6)
 	mtspr	SPRN_HDEC, r6
 	/* and set per-LPAR registers, if doing dynamic micro-threading */
 	ld	r6, HSTATE_SPLIT_MODE(r13)
@@ -545,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  *                                                                            *
  *****************************************************************************/
 
-/* Stack frame offsets */
-#define STACK_SLOT_TID		(112-16)
-#define STACK_SLOT_PSSCR	(112-24)
-#define STACK_SLOT_PID		(112-32)
-
 .global kvmppc_hv_entry
 kvmppc_hv_entry:
 
@@ -565,7 +580,7 @@ kvmppc_hv_entry:
 	 */
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -112(r1)
+	stdu	r1, -SFS(r1)
 
 	/* Save R1 in the PACA */
 	std	r1, HSTATE_HOST_R1(r13)
@@ -749,10 +764,20 @@ BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_TIDR
 	mfspr	r6, SPRN_PSSCR
 	mfspr	r7, SPRN_PID
+	mfspr	r8, SPRN_IAMR
 	std	r5, STACK_SLOT_TID(r1)
 	std	r6, STACK_SLOT_PSSCR(r1)
 	std	r7, STACK_SLOT_PID(r1)
+	std	r8, STACK_SLOT_IAMR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+	mfspr	r5, SPRN_CIABR
+	mfspr	r6, SPRN_DAWR
+	mfspr	r7, SPRN_DAWRX
+	std	r5, STACK_SLOT_CIABR(r1)
+	std	r6, STACK_SLOT_DAWR(r1)
+	std	r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 BEGIN_FTR_SECTION
 	/* Set partition DABR */
@@ -968,7 +993,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
 	/* Check if HDEC expires soon */
 	mfspr	r3, SPRN_HDEC
-	cmpwi	r3, 512		/* 1 microsecond */
+	EXTEND_HDEC(r3)
+	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
 #ifdef CONFIG_KVM_XICS
@@ -1505,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	 * set by the guest could disrupt the host.
 	 */
 	li	r0, 0
-	mtspr	SPRN_IAMR, r0
-	mtspr	SPRN_CIABR, r0
-	mtspr	SPRN_DAWRX, r0
+	mtspr	SPRN_PSPB, r0
 	mtspr	SPRN_WORT, r0
 BEGIN_FTR_SECTION
+	mtspr	SPRN_IAMR, r0
 	mtspr	SPRN_TCSCR, r0
 	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
 	li	r0, 1
@@ -1525,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	std	r6,VCPU_UAMOR(r9)
 	li	r6,0
 	mtspr	SPRN_AMR,r6
+	mtspr	SPRN_UAMOR, r6
 
 	/* Switch DSCR back to host value */
 	mfspr	r8, SPRN_DSCR
@@ -1670,12 +1696,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 	/* Restore host values of some registers */
 BEGIN_FTR_SECTION
+	ld	r5, STACK_SLOT_CIABR(r1)
+	ld	r6, STACK_SLOT_DAWR(r1)
+	ld	r7, STACK_SLOT_DAWRX(r1)
+	mtspr	SPRN_CIABR, r5
+	mtspr	SPRN_DAWR, r6
+	mtspr	SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
 	ld	r5, STACK_SLOT_TID(r1)
 	ld	r6, STACK_SLOT_PSSCR(r1)
 	ld	r7, STACK_SLOT_PID(r1)
+	ld	r8, STACK_SLOT_IAMR(r1)
 	mtspr	SPRN_TIDR, r5
 	mtspr	SPRN_PSSCR, r6
 	mtspr	SPRN_PID, r7
+	mtspr	SPRN_IAMR, r8
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 BEGIN_FTR_SECTION
 	PPC_INVALIDATE_ERAT
@@ -1819,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	li	r0, KVM_GUEST_MODE_NONE
 	stb	r0, HSTATE_IN_GUEST(r13)
 
-	ld	r0, 112+PPC_LR_STKOFF(r1)
-	addi	r1, r1, 112
+	ld	r0, SFS+PPC_LR_STKOFF(r1)
+	addi	r1, r1, SFS
 	mtlr	r0
 	blr
 
@@ -2366,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 	mfspr	r3, SPRN_DEC
 	mfspr	r4, SPRN_HDEC
 	mftb	r5
-	cmpw	r3, r4
+	extsw	r3, r3
+	EXTEND_HDEC(r4)
+	cmpd	r3, r4
 	ble	67f
 	mtspr	SPRN_DEC, r4
 67:
 	/* save expiry time of guest decrementer */
-	extsw	r3, r3
 	add	r3, r3, r5
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	ld	r5, HSTATE_KVM_VCORE(r13)
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index cbd82fde5770..09ceea6175ba 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 			struct pt_regs *regs_user_copy)
 {
 	regs_user->regs = task_pt_regs(current);
-	regs_user->abi  = perf_reg_abi(current);
+	regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+			 PERF_SAMPLE_REGS_ABI_NONE;
 }
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e6f444b46207..b5d960d6db3d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
 	return mmio_atsd_reg;
 }
 
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
 {
 	unsigned long launch;
 
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
 	/* PID */
 	launch |= pid << PPC_BITLSHIFT(38);
 
+	/* No flush */
+	launch |= !flush << PPC_BITLSHIFT(39);
+
 	/* Invalidating the entire process doesn't use a va */
 	return mmio_launch_invalidate(npu, launch, 0);
 }
 
 static int mmio_invalidate_va(struct npu *npu, unsigned long va,
-			unsigned long pid)
+			unsigned long pid, bool flush)
 {
 	unsigned long launch;
 
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
 	/* PID */
 	launch |= pid << PPC_BITLSHIFT(38);
 
+	/* No flush */
+	launch |= !flush << PPC_BITLSHIFT(39);
+
 	return mmio_launch_invalidate(npu, launch, va);
 }
 
 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 
+struct mmio_atsd_reg {
+	struct npu *npu;
+	int reg;
+};
+
+static void mmio_invalidate_wait(
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+	struct npu *npu;
+	int i, reg;
+
+	/* Wait for all invalidations to complete */
+	for (i = 0; i <= max_npu2_index; i++) {
+		if (mmio_atsd_reg[i].reg < 0)
+			continue;
+
+		/* Wait for completion */
+		npu = mmio_atsd_reg[i].npu;
+		reg = mmio_atsd_reg[i].reg;
+		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+			cpu_relax();
+
+		put_mmio_atsd_reg(npu, reg);
+
+		/*
+		 * The GPU requires two flush ATSDs to ensure all entries have
+		 * been flushed. We use PID 0 as it will never be used for a
+		 * process on the GPU.
+		 */
+		if (flush)
+			mmio_invalidate_pid(npu, 0, true);
+	}
+}
+
 /*
  * Invalidate either a single address or an entire PID depending on
  * the value of va.
  */
 static void mmio_invalidate(struct npu_context *npu_context, int va,
-			unsigned long address)
+			unsigned long address, bool flush)
 {
-	int i, j, reg;
+	int i, j;
 	struct npu *npu;
 	struct pnv_phb *nphb;
 	struct pci_dev *npdev;
-	struct {
-		struct npu *npu;
-		int reg;
-	} mmio_atsd_reg[NV_MAX_NPUS];
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
 	unsigned long pid = npu_context->mm->context.id;
 
 	/*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 
 			if (va)
 				mmio_atsd_reg[i].reg =
-					mmio_invalidate_va(npu, address, pid);
+					mmio_invalidate_va(npu, address, pid,
+							flush);
 			else
 				mmio_atsd_reg[i].reg =
-					mmio_invalidate_pid(npu, pid);
+					mmio_invalidate_pid(npu, pid, flush);
 
 			/*
 			 * The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 	 */
 	flush_tlb_mm(npu_context->mm);
 
-	/* Wait for all invalidations to complete */
-	for (i = 0; i <= max_npu2_index; i++) {
-		if (mmio_atsd_reg[i].reg < 0)
-			continue;
-
-		/* Wait for completion */
-		npu = mmio_atsd_reg[i].npu;
-		reg = mmio_atsd_reg[i].reg;
-		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
-			cpu_relax();
-		put_mmio_atsd_reg(npu, reg);
-	}
+	mmio_invalidate_wait(mmio_atsd_reg, flush);
+	if (flush)
+		/* Wait for the flush to complete */
+		mmio_invalidate_wait(mmio_atsd_reg, false);
 }
 
 static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
 	 * There should be no more translation requests for this PID, but we
 	 * need to ensure any entries for it are removed from the TLB.
 	 */
-	mmio_invalidate(npu_context, 0, 0);
+	mmio_invalidate(npu_context, 0, 0, true);
 }
 
 static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
 {
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 
-	mmio_invalidate(npu_context, 1, address);
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
 {
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 
-	mmio_invalidate(npu_context, 1, address);
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 	unsigned long address;
 
-	for (address = start; address <= end; address += PAGE_SIZE)
-		mmio_invalidate(npu_context, 1, address);
+	for (address = start; address < end; address += PAGE_SIZE)
+		mmio_invalidate(npu_context, 1, address, false);
+
+	/* Do the flush only on the final addess == end */
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 		/* No nvlink associated with this GPU device */
 		return ERR_PTR(-ENODEV);
 
-	if (!mm) {
-		/* kernel thread contexts are not supported */
+	if (!mm || mm->context.id == 0) {
+		/*
+		 * Kernel thread contexts are not supported and context id 0 is
+		 * reserved on the GPU.
+		 */
 		return ERR_PTR(-EINVAL);
 	}
 
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 60d395fdc864..aeac013968f2 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -221,11 +221,6 @@ extern void release_thread(struct task_struct *);
 /* Free guarded storage control block for current */
 void exit_thread_gs(void);
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk) ((struct pt_regs *) \
         (task_stack_page(tsk) + THREAD_SIZE) - 1)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index e545ffe5155a..8e622bb52f7a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,8 +564,6 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
-	if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
-		diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
 	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);
@@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused)
 		break;
 	case REIPL_METHOD_CCW_DIAG:
 		diag308(DIAG308_SET, reipl_block_ccw);
-		if (MACHINE_IS_LPAR)
-			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
-		else
-			diag308(DIAG308_LOAD_CLEAR, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RW_DIAG:
 		diag308(DIAG308_SET, reipl_block_fcp);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 999d7154bbdc..bb32b8618bf6 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -41,31 +41,6 @@
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
-/*
- * Return saved PC of a blocked thread. used in kernel/sched.
- * resume in entry.S does not create a new stack frame, it
- * just stores the registers %r6-%r15 to the frame given by
- * schedule. We want to return the address of the caller of
- * schedule, so we have to walk the backchain one time to
- * find the frame schedule() store its return address.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct stack_frame *sf, *low, *high;
-
-	if (!tsk || !task_stack_page(tsk))
-		return 0;
-	low = task_stack_page(tsk);
-	high = (struct stack_frame *) task_pt_regs(tsk);
-	sf = (struct stack_frame *) tsk->thread.ksp;
-	if (sf <= low || sf > high)
-		return 0;
-	sf = (struct stack_frame *) sf->back_chain;
-	if (sf <= low || sf > high)
-		return 0;
-	return sf->gprs[8];
-}
-
 extern void kernel_thread_starter(void);
 
 /*
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9da243d94cc3..3b297fa3aa67 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 	ptr = asce.origin * 4096;
 	if (asce.r) {
 		*fake = 1;
+		ptr = 0;
 		asce.dt = ASCE_TYPE_REGION1;
 	}
 	switch (asce.dt) {
 	case ASCE_TYPE_REGION1:
-		if (vaddr.rfx01 > asce.tl && !asce.r)
+		if (vaddr.rfx01 > asce.tl && !*fake)
 			return PGM_REGION_FIRST_TRANS;
 		break;
 	case ASCE_TYPE_REGION2:
@@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 		union region1_table_entry rfte;
 
 		if (*fake) {
-			/* offset in 16EB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+			ptr += (unsigned long) vaddr.rfx << 53;
 			rfte.val = ptr;
 			goto shadow_r2t;
 		}
@@ -1036,8 +1036,7 @@ shadow_r2t:
 		union region2_table_entry rste;
 
 		if (*fake) {
-			/* offset in 8PB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+			ptr += (unsigned long) vaddr.rsx << 42;
 			rste.val = ptr;
 			goto shadow_r3t;
 		}
@@ -1064,8 +1063,7 @@ shadow_r3t:
 		union region3_table_entry rtte;
 
 		if (*fake) {
-			/* offset in 4TB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+			ptr += (unsigned long) vaddr.rtx << 31;
 			rtte.val = ptr;
 			goto shadow_sgt;
 		}
@@ -1101,8 +1099,7 @@ shadow_sgt:
 		union segment_table_entry ste;
 
 		if (*fake) {
-			/* offset in 2G guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+			ptr += (unsigned long) vaddr.sx << 20;
 			ste.val = ptr;
 			goto shadow_pgt;
 		}
diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h
index d9a922d8711b..299274581968 100644
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h
@@ -13,7 +13,6 @@ struct task_struct;
  */
 extern void (*cpu_wait)(void);
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
 extern void start_thread(struct pt_regs *regs,
 			unsigned long pc, unsigned long sp);
 extern unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index eb64d7a677cb..6e20241a1ed4 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
 	return 1;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return task_pt_regs(tsk)->cp0_epc;
-}
-
 unsigned long get_wchan(struct task_struct *task)
 {
 	if (!task || task == current || task->state == TASK_RUNNING)
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index dd27159819eb..b395e5620c0b 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -67,9 +67,6 @@ struct thread_struct {
 	.current_ds = KERNEL_DS, \
 }
 
-/* Return saved PC of a blocked thread. */
-unsigned long thread_saved_pc(struct task_struct *t);
-
 /* Do necessary setup to start up a newly executed thread. */
 static inline void start_thread(struct pt_regs * regs, unsigned long pc,
 				    unsigned long sp)
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index b58ee9018433..f04dc5a43062 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -89,9 +89,7 @@ struct thread_struct {
 #include <linux/types.h>
 #include <asm/fpumacro.h>
 
-/* Return saved PC of a blocked thread. */
 struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *);
 
 /* On Uniprocessor, even in RMO processes see TSO semantics */
 #ifdef CONFIG_SMP
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index b6dac8e980f0..9245f93398c7 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -177,14 +177,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 }
 
 /*
- * Note: sparc64 has a pretty intricated thread_saved_pc, check it out.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return task_thread_info(tsk)->kpc;
-}
-
-/*
  * Free current thread data structures etc..
  */
 void exit_thread(struct task_struct *tsk)
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 1badc493e62e..b96104da5bd6 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init);
 
 #endif
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct thread_info *ti = task_thread_info(tsk);
-	unsigned long ret = 0xdeadbeefUL;
-	
-	if (ti && ti->ksp) {
-		unsigned long *sp;
-		sp = (unsigned long *)(ti->ksp + STACK_BIAS);
-		if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL &&
-		    sp[14]) {
-			unsigned long *fp;
-			fp = (unsigned long *)(sp[14] + STACK_BIAS);
-			if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL)
-				ret = fp[15];
-		}
-	}
-	return ret;
-}
-
 /* Free current thread data structures etc.. */
 void exit_thread(struct task_struct *tsk)
 {
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 0bc9968b97a1..f71e5206650b 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task)
 
 extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags);
 
-
-/*
- * Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
- */
-#define thread_saved_pc(t)   ((t)->thread.pc)
-
 unsigned long get_wchan(struct task_struct *p);
 
 /* Return initial ksp value for given task. */
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 2d1e0dd5bb0b..f6d1a3f747a9 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 static inline void mm_copy_segments(struct mm_struct *from_mm,
 				    struct mm_struct *new_mm)
 {
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 64a1fd06f3fd..7b5640117325 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -56,12 +56,6 @@ union thread_union cpu0_irqstack
 	__attribute__((__section__(".data..init_irqstack"))) =
 		{ INIT_THREAD_INFO(init_task) };
 
-unsigned long thread_saved_pc(struct task_struct *task)
-{
-	/* FIXME: Need to look up userspace_pid by cpu */
-	return os_process_pc(userspace_pid[0]);
-}
-
 /* Changed in setup_arch, which is called in early boot */
 static char host_info[(__NEW_UTS_LEN + 1) * 5];
 
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a6d91d4e37a1..110ce8238466 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids
  [ C(DTLB) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_INST_RETIRED.ALL_LOADS */
-		[ C(RESULT_MISS)   ] = 0x608,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
+		[ C(RESULT_MISS)   ] = 0xe08,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_INST_RETIRED.ALL_STORES */
-		[ C(RESULT_MISS)   ] = 0x649,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
+		[ C(RESULT_MISS)   ] = 0xe49,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
 	},
 	[ C(OP_PREFETCH) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0,
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 055962615779..722d0e568863 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
 
 	bool perm_ok; /* do not check permissions if true */
 	bool ud;	/* inject an #UD if host doesn't support insn */
+	bool tf;	/* TF value before instruction (after for syscall/sysret) */
 
 	bool have_exception;
 	struct x86_exception exception;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fba100713924..d5acc27ed1cc 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -2,8 +2,7 @@
 #define _ASM_X86_MSHYPER_H
 
 #include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
+#include <linux/atomic.h>
 #include <asm/hyperv.h>
 
 /*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402..a28b671f1549 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -860,8 +860,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task);
 
 #endif /* CONFIG_X86_64 */
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 					       unsigned long new_sp);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0bb88428cbf2..3ca198080ea9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -545,17 +545,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 }
 
 /*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct inactive_task_frame *frame =
-		(struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
-	return READ_ONCE_NOCHECK(frame->ret_addr);
-}
-
-/*
  * Called from fs/proc with a reference on @p to find the function
  * which called into schedule(). This needs to be done carefully
  * because the task might wake up and we might look at a stack
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0816ab2e8adc..80890dee66ce 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
 	}
 
+	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
 	return X86EMUL_CONTINUE;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 87d3cb901935..0e846f0cb83b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
 	ctxt->eflags = kvm_get_rflags(vcpu);
+	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
 	ctxt->eip = kvm_rip_read(vcpu);
 	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
 		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
 	return dr6;
 }
 
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 {
 	struct kvm_run *kvm_run = vcpu->run;
 
-	/*
-	 * rflags is the old, "raw" value of the flags.  The new value has
-	 * not been saved yet.
-	 *
-	 * This is correct even for TF set by the guest, because "the
-	 * processor will not generate this exception after the instruction
-	 * that sets the TF flag".
-	 */
-	if (unlikely(rflags & X86_EFLAGS_TF)) {
-		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-			kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
-						  DR6_RTM;
-			kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
-			kvm_run->debug.arch.exception = DB_VECTOR;
-			kvm_run->exit_reason = KVM_EXIT_DEBUG;
-			*r = EMULATE_USER_EXIT;
-		} else {
-			/*
-			 * "Certain debug exceptions may clear bit 0-3.  The
-			 * remaining contents of the DR6 register are never
-			 * cleared by the processor".
-			 */
-			vcpu->arch.dr6 &= ~15;
-			vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
-			kvm_queue_exception(vcpu, DB_VECTOR);
-		}
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+		kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+		kvm_run->debug.arch.exception = DB_VECTOR;
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		*r = EMULATE_USER_EXIT;
+	} else {
+		/*
+		 * "Certain debug exceptions may clear bit 0-3.  The
+		 * remaining contents of the DR6 register are never
+		 * cleared by the processor".
+		 */
+		vcpu->arch.dr6 &= ~15;
+		vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+		kvm_queue_exception(vcpu, DB_VECTOR);
 	}
 }
 
@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	int r = EMULATE_DONE;
 
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
-	kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+	/*
+	 * rflags is the old, "raw" value of the flags.  The new value has
+	 * not been saved yet.
+	 *
+	 * This is correct even for TF set by the guest, because "the
+	 * processor will not generate this exception after the instruction
+	 * that sets the TF flag".
+	 */
+	if (unlikely(rflags & X86_EFLAGS_TF))
+		kvm_vcpu_do_singlestep(vcpu, &r);
 	return r == EMULATE_DONE;
 }
 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@ -5726,8 +5727,9 @@ restart:
 		toggle_interruptibility(vcpu, ctxt->interruptibility);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 		kvm_rip_write(vcpu, ctxt->eip);
-		if (r == EMULATE_DONE)
-			kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+		if (r == EMULATE_DONE &&
+		    (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+			kvm_vcpu_do_singlestep(vcpu, &r);
 		if (!ctxt->have_exception ||
 		    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
 			__kvm_set_rflags(vcpu, ctxt->eflags);
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 003eeee3fbc6..30ee8c608853 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -213,8 +213,6 @@ struct mm_struct;
 #define release_segments(mm)	do { } while(0)
 #define forget_segments()	do { } while (0)
 
-#define thread_saved_pc(tsk)	(task_pt_regs(tsk)->pc)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->pc)
diff --git a/block/bio.c b/block/bio.c
index 888e7801c638..26b0810fb8ea 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -240,20 +240,21 @@ fallback:
 	return bvl;
 }
 
-static void __bio_free(struct bio *bio)
+void bio_uninit(struct bio *bio)
 {
 	bio_disassociate_task(bio);
 
 	if (bio_integrity(bio))
 		bio_integrity_free(bio);
 }
+EXPORT_SYMBOL(bio_uninit);
 
 static void bio_free(struct bio *bio)
 {
 	struct bio_set *bs = bio->bi_pool;
 	void *p;
 
-	__bio_free(bio);
+	bio_uninit(bio);
 
 	if (bs) {
 		bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
@@ -271,6 +272,11 @@ static void bio_free(struct bio *bio)
 	}
 }
 
+/*
+ * Users of this function have their own bio allocation. Subsequently,
+ * they must remember to pair any call to bio_init() with bio_uninit()
+ * when IO has completed, or when the bio is released.
+ */
 void bio_init(struct bio *bio, struct bio_vec *table,
 	      unsigned short max_vecs)
 {
@@ -297,7 +303,7 @@ void bio_reset(struct bio *bio)
 {
 	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
 
-	__bio_free(bio);
+	bio_uninit(bio);
 
 	memset(bio, 0, BIO_RESET_BYTES);
 	bio->bi_flags = flags;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1f5b692526ae..0ded5e846335 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
 		__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
 }
 
+/*
+ * Mark a hardware queue as needing a restart. For shared queues, maintain
+ * a count of how many hardware queues are marked for restart.
+ */
+static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
+{
+	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+		return;
+
+	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+		struct request_queue *q = hctx->queue;
+
+		if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+			atomic_inc(&q->shared_hctx_restart);
+	} else
+		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+}
+
+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+{
+	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+		return false;
+
+	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+		struct request_queue *q = hctx->queue;
+
+		if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+			atomic_dec(&q->shared_hctx_restart);
+	} else
+		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+
+	if (blk_mq_hctx_has_pending(hctx)) {
+		blk_mq_run_hw_queue(hctx, true);
+		return true;
+	}
+
+	return false;
+}
+
 struct request *blk_mq_sched_get_request(struct request_queue *q,
 					 struct bio *bio,
 					 unsigned int op,
@@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 	return true;
 }
 
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
-		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-		if (blk_mq_hctx_has_pending(hctx)) {
-			blk_mq_run_hw_queue(hctx, true);
-			return true;
-		}
-	}
-	return false;
-}
-
 /**
  * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
  * @pos:    loop cursor.
@@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
 	unsigned int i, j;
 
 	if (set->flags & BLK_MQ_F_TAG_SHARED) {
+		/*
+		 * If this is 0, then we know that no hardware queues
+		 * have RESTART marked. We're done.
+		 */
+		if (!atomic_read(&queue->shared_hctx_restart))
+			return;
+
 		rcu_read_lock();
 		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
 					   tag_set_list) {
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index edafb5383b7b..5007edece51a 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
 	return false;
 }
 
-/*
- * Mark a hardware queue as needing a restart.
- */
-static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-}
-
 static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
 {
 	return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bb66c96850b1..958cedaff8b8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	}
 }
 
+/*
+ * Caller needs to ensure that we're either frozen/quiesced, or that
+ * the queue isn't live yet.
+ */
 static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		if (shared)
+		if (shared) {
+			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+				atomic_inc(&q->shared_hctx_restart);
 			hctx->flags |= BLK_MQ_F_TAG_SHARED;
-		else
+		} else {
+			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+				atomic_dec(&q->shared_hctx_restart);
 			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+		}
 	}
 }
 
-static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
+static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
+					bool shared)
 {
 	struct request_queue *q;
 
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 8af664f7d27c..be117495eb43 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -877,7 +877,7 @@ static void aead_sock_destruct(struct sock *sk)
 	unsigned int ivlen = crypto_aead_ivsize(
 				crypto_aead_reqtfm(&ctx->aead_req));
 
-	WARN_ON(atomic_read(&sk->sk_refcnt) != 0);
+	WARN_ON(refcount_read(&sk->sk_refcnt) != 0);
 	aead_put_sgl(sk);
 	sock_kzfree_s(sk, ctx->iv, ivlen);
 	sock_kfree_s(sk, ctx, ctx->len);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 3a10d7573477..d53162997f32 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1428,6 +1428,37 @@ static void acpi_init_coherency(struct acpi_device *adev)
 	adev->flags.coherent_dma = cca;
 }
 
+static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
+{
+	bool *is_spi_i2c_slave_p = data;
+
+	if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+		return 1;
+
+	/*
+	 * devices that are connected to UART still need to be enumerated to
+	 * platform bus
+	 */
+	if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
+		*is_spi_i2c_slave_p = true;
+
+	 /* no need to do more checking */
+	return -1;
+}
+
+static bool acpi_is_spi_i2c_slave(struct acpi_device *device)
+{
+	struct list_head resource_list;
+	bool is_spi_i2c_slave = false;
+
+	INIT_LIST_HEAD(&resource_list);
+	acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
+			       &is_spi_i2c_slave);
+	acpi_dev_free_resource_list(&resource_list);
+
+	return is_spi_i2c_slave;
+}
+
 void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 			     int type, unsigned long long sta)
 {
@@ -1443,6 +1474,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	acpi_bus_get_flags(device);
 	device->flags.match_driver = false;
 	device->flags.initialized = true;
+	device->flags.spi_i2c_slave = acpi_is_spi_i2c_slave(device);
 	acpi_device_clear_enumerated(device);
 	device_initialize(&device->dev);
 	dev_set_uevent_suppress(&device->dev, true);
@@ -1727,38 +1759,13 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used,
 	return AE_OK;
 }
 
-static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
-{
-	bool *is_spi_i2c_slave_p = data;
-
-	if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
-		return 1;
-
-	/*
-	 * devices that are connected to UART still need to be enumerated to
-	 * platform bus
-	 */
-	if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
-		*is_spi_i2c_slave_p = true;
-
-	 /* no need to do more checking */
-	return -1;
-}
-
 static void acpi_default_enumeration(struct acpi_device *device)
 {
-	struct list_head resource_list;
-	bool is_spi_i2c_slave = false;
-
 	/*
 	 * Do not enumerate SPI/I2C slaves as they will be enumerated by their
 	 * respective parents.
 	 */
-	INIT_LIST_HEAD(&resource_list);
-	acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
-			       &is_spi_i2c_slave);
-	acpi_dev_free_resource_list(&resource_list);
-	if (!is_spi_i2c_slave) {
+	if (!device->flags.spi_i2c_slave) {
 		acpi_create_platform_device(device, NULL);
 		acpi_device_set_enumerated(device);
 	} else {
@@ -1854,7 +1861,7 @@ static void acpi_bus_attach(struct acpi_device *device)
 		return;
 
 	device->flags.match_driver = true;
-	if (ret > 0) {
+	if (ret > 0 && !device->flags.spi_i2c_slave) {
 		acpi_device_set_enumerated(device);
 		goto ok;
 	}
@@ -1863,10 +1870,10 @@ static void acpi_bus_attach(struct acpi_device *device)
 	if (ret < 0)
 		return;
 
-	if (device->pnp.type.platform_id)
-		acpi_default_enumeration(device);
-	else
+	if (!device->pnp.type.platform_id && !device->flags.spi_i2c_slave)
 		acpi_device_set_enumerated(device);
+	else
+		acpi_default_enumeration(device);
 
  ok:
 	list_for_each_entry(child, &device->children, node)
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 7584ae1ded85..f0433adcd8fc 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -924,12 +924,7 @@ fore200e_tx_irq(struct fore200e* fore200e)
 		else {
 		    dev_kfree_skb_any(entry->skb);
 		}
-#if 1
-		/* race fixed by the above incarnation mechanism, but... */
-		if (atomic_read(&sk_atm(vcc)->sk_wmem_alloc) < 0) {
-		    atomic_set(&sk_atm(vcc)->sk_wmem_alloc, 0);
-		}
-#endif
+
 		/* check error condition */
 		if (*entry->status & STATUS_ERROR)
 		    atomic_inc(&vcc->stats->tx_err);
@@ -1130,13 +1125,9 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp
 	return -ENOMEM;
     }
 
-    ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
-
     vcc->push(vcc, skb);
     atomic_inc(&vcc->stats->rx);
 
-    ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
-
     return 0;
 }
 
@@ -1572,7 +1563,6 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb)
     unsigned long           flags;
 
     ASSERT(vcc);
-    ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
     ASSERT(fore200e);
     ASSERT(fore200e_vcc);
 
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 461da2bce8ef..37ee21c5a5ca 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -2395,7 +2395,7 @@ he_close(struct atm_vcc *vcc)
 		 * TBRQ, the host issues the close command to the adapter.
 		 */
 
-		while (((tx_inuse = atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) &&
+		while (((tx_inuse = refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) &&
 		       (retry < MAX_RETRY)) {
 			msleep(sleep);
 			if (sleep < 250)
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 4e64de380bda..60bacba03d17 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -724,7 +724,7 @@ push_on_scq(struct idt77252_dev *card, struct vc_map *vc, struct sk_buff *skb)
 		struct sock *sk = sk_atm(vcc);
 
 		vc->estimator->cells += (skb->len + 47) / 48;
-		if (atomic_read(&sk->sk_wmem_alloc) >
+		if (refcount_read(&sk->sk_wmem_alloc) >
 		    (sk->sk_sndbuf >> 1)) {
 			u32 cps = vc->estimator->maxcps;
 
@@ -2009,7 +2009,7 @@ idt77252_send_oam(struct atm_vcc *vcc, void *cell, int flags)
 		atomic_inc(&vcc->stats->tx_err);
 		return -ENOMEM;
 	}
-	atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
 
 	skb_put_data(skb, cell, 52);
 
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 726c32e35db9..0e824091a12f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg)
 	unsigned long timeout;
 	int ret;
 
-	xen_blkif_get(blkif);
-
 	set_freezable();
 	while (!kthread_should_stop()) {
 		if (try_to_freeze())
@@ -665,7 +663,6 @@ purge_gnt_list:
 		print_stats(ring);
 
 	ring->xenblkd = NULL;
-	xen_blkif_put(blkif);
 
 	return 0;
 }
@@ -1436,34 +1433,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 static void make_response(struct xen_blkif_ring *ring, u64 id,
 			  unsigned short op, int st)
 {
-	struct blkif_response  resp;
+	struct blkif_response *resp;
 	unsigned long     flags;
 	union blkif_back_rings *blk_rings;
 	int notify;
 
-	resp.id        = id;
-	resp.operation = op;
-	resp.status    = st;
-
 	spin_lock_irqsave(&ring->blk_ring_lock, flags);
 	blk_rings = &ring->blk_rings;
 	/* Place on the response ring for the relevant domain. */
 	switch (ring->blkif->blk_protocol) {
 	case BLKIF_PROTOCOL_NATIVE:
-		memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
-		       &resp, sizeof(resp));
+		resp = RING_GET_RESPONSE(&blk_rings->native,
+					 blk_rings->native.rsp_prod_pvt);
 		break;
 	case BLKIF_PROTOCOL_X86_32:
-		memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
-		       &resp, sizeof(resp));
+		resp = RING_GET_RESPONSE(&blk_rings->x86_32,
+					 blk_rings->x86_32.rsp_prod_pvt);
 		break;
 	case BLKIF_PROTOCOL_X86_64:
-		memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
-		       &resp, sizeof(resp));
+		resp = RING_GET_RESPONSE(&blk_rings->x86_64,
+					 blk_rings->x86_64.rsp_prod_pvt);
 		break;
 	default:
 		BUG();
 	}
+
+	resp->id        = id;
+	resp->operation = op;
+	resp->status    = st;
+
 	blk_rings->common.rsp_prod_pvt++;
 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
 	spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index dea61f6ab8cb..ecb35fe8ca8d 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues;
 struct blkif_common_request {
 	char dummy;
 };
-struct blkif_common_response {
-	char dummy;
-};
+
+/* i386 protocol version */
 
 struct blkif_x86_32_request_rw {
 	uint8_t        nr_segments;  /* number of segments                   */
@@ -129,14 +128,6 @@ struct blkif_x86_32_request {
 	} u;
 } __attribute__((__packed__));
 
-/* i386 protocol version */
-#pragma pack(push, 4)
-struct blkif_x86_32_response {
-	uint64_t        id;              /* copied from request */
-	uint8_t         operation;       /* copied from request */
-	int16_t         status;          /* BLKIF_RSP_???       */
-};
-#pragma pack(pop)
 /* x86_64 protocol version */
 
 struct blkif_x86_64_request_rw {
@@ -193,18 +184,12 @@ struct blkif_x86_64_request {
 	} u;
 } __attribute__((__packed__));
 
-struct blkif_x86_64_response {
-	uint64_t       __attribute__((__aligned__(8))) id;
-	uint8_t         operation;       /* copied from request */
-	int16_t         status;          /* BLKIF_RSP_???       */
-};
-
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
-		  struct blkif_common_response);
+		  struct blkif_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
-		  struct blkif_x86_32_response);
+		  struct blkif_response __packed);
 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
-		  struct blkif_x86_64_response);
+		  struct blkif_response);
 
 union blkif_back_rings {
 	struct blkif_back_ring        native;
@@ -281,6 +266,7 @@ struct xen_blkif_ring {
 
 	wait_queue_head_t	wq;
 	atomic_t		inflight;
+	bool			active;
 	/* One thread per blkif ring. */
 	struct task_struct	*xenblkd;
 	unsigned int		waiting_reqs;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 1f3dfaa54d87..792da683e70d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
 		init_waitqueue_head(&ring->shutdown_wq);
 		ring->blkif = blkif;
 		ring->st_print = jiffies;
-		xen_blkif_get(blkif);
+		ring->active = true;
 	}
 
 	return 0;
@@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
 		struct xen_blkif_ring *ring = &blkif->rings[r];
 		unsigned int i = 0;
 
+		if (!ring->active)
+			continue;
+
 		if (ring->xenblkd) {
 			kthread_stop(ring->xenblkd);
 			wake_up(&ring->shutdown_wq);
-			ring->xenblkd = NULL;
 		}
 
 		/* The above kthread_stop() guarantees that at this point we
@@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
 		BUG_ON(ring->free_pages_num != 0);
 		BUG_ON(ring->persistent_gnt_c != 0);
 		WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
-		xen_blkif_put(blkif);
+		ring->active = false;
 	}
 	blkif->nr_ring_pages = 0;
 	/*
@@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
-
-	xen_blkif_disconnect(blkif);
+	WARN_ON(xen_blkif_disconnect(blkif));
 	xen_vbd_free(&blkif->vbd);
+	kfree(blkif->be->mode);
+	kfree(blkif->be);
 
 	/* Make sure everything is drained before shutting down */
 	kmem_cache_free(xen_blkif_cachep, blkif);
@@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
 		xen_blkif_put(be->blkif);
 	}
 
-	kfree(be->mode);
-	kfree(be);
 	return 0;
 }
 
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 24f8c4e93f4e..9ab6cfbb831d 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -295,6 +295,7 @@ static const struct {
 	{ 0x410e, "BCM43341B0"	},	/* 002.001.014 */
 	{ 0x4406, "BCM4324B3"	},	/* 002.004.006 */
 	{ 0x610c, "BCM4354"	},	/* 003.001.012 */
+	{ 0x2209, "BCM43430A1"  },	/* 001.002.009 */
 	{ }
 };
 
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index d2e9e2d1b014..6a662d0161b4 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -419,8 +419,7 @@ finalize:
 	if (err)
 		return err;
 
-	err = bcm_request_irq(bcm);
-	if (!err)
+	if (!bcm_request_irq(bcm))
 		err = bcm_setup_sleep(hu);
 
 	return err;
@@ -657,6 +656,15 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
 		},
 		.driver_data = &acpi_active_low,
 	},
+	{
+		.ident = "Asus T100CHI",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR,
+					"ASUSTeK COMPUTER INC."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"),
+		},
+		.driver_data = &acpi_active_low,
+	},
 	{	/* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */
 		.ident = "Lenovo ThinkPad 8",
 		.matches = {
diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c
index 7de0edc0ff8c..aea930101dd2 100644
--- a/drivers/bluetooth/hci_serdev.c
+++ b/drivers/bluetooth/hci_serdev.c
@@ -31,7 +31,7 @@
 
 #include "hci_uart.h"
 
-struct serdev_device_ops hci_serdev_client_ops;
+static struct serdev_device_ops hci_serdev_client_ops;
 
 static inline void hci_uart_tx_complete(struct hci_uart *hu, int pkt_type)
 {
@@ -268,7 +268,7 @@ static int hci_uart_receive_buf(struct serdev_device *serdev, const u8 *data,
 	return count;
 }
 
-struct serdev_device_ops hci_serdev_client_ops = {
+static struct serdev_device_ops hci_serdev_client_ops = {
 	.receive_buf = hci_uart_receive_buf,
 	.write_wakeup = hci_uart_write_wakeup,
 };
diff --git a/drivers/char/random.c b/drivers/char/random.c
index e870f329db88..01a260f67437 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -803,13 +803,13 @@ static int crng_fast_load(const char *cp, size_t len)
 		p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp;
 		cp++; crng_init_cnt++; len--;
 	}
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
 	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
 		invalidate_batched_entropy();
 		crng_init = 1;
 		wake_up_interruptible(&crng_init_wait);
 		pr_notice("random: fast init done\n");
 	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
 	return 1;
 }
 
@@ -841,6 +841,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	}
 	memzero_explicit(&buf, sizeof(buf));
 	crng->init_time = jiffies;
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
 	if (crng == &primary_crng && crng_init < 2) {
 		invalidate_batched_entropy();
 		crng_init = 2;
@@ -848,7 +849,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 		wake_up_interruptible(&crng_init_wait);
 		pr_notice("random: crng init done\n");
 	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
 }
 
 static inline void crng_wait_ready(void)
@@ -2041,8 +2041,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
 u64 get_random_u64(void)
 {
 	u64 ret;
-	bool use_lock = crng_init < 2;
-	unsigned long flags;
+	bool use_lock = READ_ONCE(crng_init) < 2;
+	unsigned long flags = 0;
 	struct batched_entropy *batch;
 
 #if BITS_PER_LONG == 64
@@ -2073,8 +2073,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
 u32 get_random_u32(void)
 {
 	u32 ret;
-	bool use_lock = crng_init < 2;
-	unsigned long flags;
+	bool use_lock = READ_ONCE(crng_init) < 2;
+	unsigned long flags = 0;
 	struct batched_entropy *batch;
 
 	if (arch_get_random_int(&ret))
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 4bed671e490e..8b5c30062d99 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1209,9 +1209,9 @@ arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
 		return 0;
 	}
 
-	rate = readl_relaxed(frame + CNTFRQ);
+	rate = readl_relaxed(base + CNTFRQ);
 
-	iounmap(frame);
+	iounmap(base);
 
 	return rate;
 }
diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c
index 44e5e951583b..8e64b8460f11 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
+#include <linux/clocksource.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/slab.h>
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 2e9c830ae1cd..c4656c4d44a6 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -12,6 +12,7 @@
 
 #include <linux/clk.h>
 #include <linux/clockchips.h>
+#include <linux/clocksource.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 5104b6398139..c83ea68be792 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -721,7 +721,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 	u32 set;
 
 	if (!of_device_is_compatible(mvchip->chip.of_node,
-				     "marvell,armada-370-xp-gpio"))
+				     "marvell,armada-370-gpio"))
 		return 0;
 
 	if (IS_ERR(mvchip->clk))
@@ -852,7 +852,7 @@ static const struct of_device_id mvebu_gpio_of_match[] = {
 		.data	    = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP,
 	},
 	{
-		.compatible = "marvell,armada-370-xp-gpio",
+		.compatible = "marvell,armada-370-gpio",
 		.data	    = (void *) MVEBU_GPIO_SOC_VARIANT_ORION,
 	},
 	{
@@ -1128,7 +1128,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
 						 mvchip);
 	}
 
-	/* Armada 370/XP has simple PWM support for GPIO lines */
+	/* Some MVEBU SoCs have simple PWM support for GPIO lines */
 	if (IS_ENABLED(CONFIG_PWM))
 		return mvebu_pwm_probe(pdev, mvchip, id);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 1cf78f4dd339..1e8e1123ddf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -693,6 +693,10 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
 			DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n",
 				 adev->clock.default_dispclk / 100);
 			adev->clock.default_dispclk = 60000;
+		} else if (adev->clock.default_dispclk <= 60000) {
+			DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n",
+				 adev->clock.default_dispclk / 100);
+			adev->clock.default_dispclk = 62500;
 		}
 		adev->clock.dp_extclk =
 			le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f2d705e6a75a..ab6b0d0febab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -449,6 +449,7 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	/* Vega 10 */
 	{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
index 8c9bc75a9c2d..8a0818b23ea4 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -165,7 +165,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
-	ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+	ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
 
 	memset(&args, 0, sizeof(args));
 
@@ -178,7 +178,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
 void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
 {
 	int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
-	ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+	ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
 
 	memset(&args, 0, sizeof(args));
 
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 9f847615ac74..48ca2457df8c 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1229,21 +1229,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 	if (!connector)
 		return -ENOENT;
 
-	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
-	encoder = drm_connector_get_encoder(connector);
-	if (encoder)
-		out_resp->encoder_id = encoder->base.id;
-	else
-		out_resp->encoder_id = 0;
-
-	ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
-			(uint32_t __user *)(unsigned long)(out_resp->props_ptr),
-			(uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
-			&out_resp->count_props);
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
-	if (ret)
-		goto out_unref;
-
 	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++)
 		if (connector->encoder_ids[i] != 0)
 			encoders_count++;
@@ -1256,7 +1241,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 				if (put_user(connector->encoder_ids[i],
 					     encoder_ptr + copied)) {
 					ret = -EFAULT;
-					goto out_unref;
+					goto out;
 				}
 				copied++;
 			}
@@ -1300,15 +1285,32 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 			if (copy_to_user(mode_ptr + copied,
 					 &u_mode, sizeof(u_mode))) {
 				ret = -EFAULT;
+				mutex_unlock(&dev->mode_config.mutex);
+
 				goto out;
 			}
 			copied++;
 		}
 	}
 	out_resp->count_modes = mode_count;
-out:
 	mutex_unlock(&dev->mode_config.mutex);
-out_unref:
+
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+	encoder = drm_connector_get_encoder(connector);
+	if (encoder)
+		out_resp->encoder_id = encoder->base.id;
+	else
+		out_resp->encoder_id = 0;
+
+	/* Only grab properties after probing, to make sure EDID and other
+	 * properties reflect the latest status. */
+	ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
+			(uint32_t __user *)(unsigned long)(out_resp->props_ptr),
+			(uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
+			&out_resp->count_props);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+out:
 	drm_connector_put(connector);
 
 	return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index c4a091e87426..e437fba1209d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -106,9 +106,10 @@ struct etnaviv_gem_submit {
 	struct etnaviv_gpu *gpu;
 	struct ww_acquire_ctx ticket;
 	struct dma_fence *fence;
+	u32 flags;
 	unsigned int nr_bos;
 	struct etnaviv_gem_submit_bo bos[0];
-	u32 flags;
+	/* No new members here, the previous one is variable-length! */
 };
 
 int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index de80ee1b71df..1013765274da 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -172,7 +172,7 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
 		bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
-		bool explicit = !(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
+		bool explicit = !!(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
 
 		ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write,
 						 explicit);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d689e511744e..4bd1467c17b1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -292,6 +292,8 @@ static int per_file_stats(int id, void *ptr, void *data)
 	struct file_stats *stats = data;
 	struct i915_vma *vma;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	stats->count++;
 	stats->total += obj->base.size;
 	if (!obj->bind_count)
@@ -476,6 +478,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		struct drm_i915_gem_request *request;
 		struct task_struct *task;
 
+		mutex_lock(&dev->struct_mutex);
+
 		memset(&stats, 0, sizeof(stats));
 		stats.file_priv = file->driver_priv;
 		spin_lock(&file->table_lock);
@@ -487,7 +491,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		 * still alive (e.g. get_pid(current) => fork() => exit()).
 		 * Therefore, we need to protect this ->comm access using RCU.
 		 */
-		mutex_lock(&dev->struct_mutex);
 		request = list_first_entry_or_null(&file_priv->mm.request_list,
 						   struct drm_i915_gem_request,
 						   client_link);
@@ -497,6 +500,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 				PIDTYPE_PID);
 		print_file_stats(m, task ? task->comm : "<unknown>", stats);
 		rcu_read_unlock();
+
 		mutex_unlock(&dev->struct_mutex);
 	}
 	mutex_unlock(&dev->filelist_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 462031cbd77f..615f0a855222 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2285,8 +2285,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	struct page *page;
 	unsigned long last_pfn = 0;	/* suppress gcc warning */
 	unsigned int max_segment;
+	gfp_t noreclaim;
 	int ret;
-	gfp_t gfp;
 
 	/* Assert that the object is not currently in any GPU domain. As it
 	 * wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2315,22 +2315,31 @@ rebuild_st:
 	 * Fail silently without starting the shrinker
 	 */
 	mapping = obj->base.filp->f_mapping;
-	gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
-	gfp |= __GFP_NORETRY | __GFP_NOWARN;
+	noreclaim = mapping_gfp_constraint(mapping,
+					   ~(__GFP_IO | __GFP_RECLAIM));
+	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
 	sg = st->sgl;
 	st->nents = 0;
 	for (i = 0; i < page_count; i++) {
-		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-		if (unlikely(IS_ERR(page))) {
-			i915_gem_shrink(dev_priv,
-					page_count,
-					I915_SHRINK_BOUND |
-					I915_SHRINK_UNBOUND |
-					I915_SHRINK_PURGEABLE);
+		const unsigned int shrink[] = {
+			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
+			0,
+		}, *s = shrink;
+		gfp_t gfp = noreclaim;
+
+		do {
 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-		}
-		if (unlikely(IS_ERR(page))) {
-			gfp_t reclaim;
+			if (likely(!IS_ERR(page)))
+				break;
+
+			if (!*s) {
+				ret = PTR_ERR(page);
+				goto err_sg;
+			}
+
+			i915_gem_shrink(dev_priv, 2 * page_count, *s++);
+			cond_resched();
 
 			/* We've tried hard to allocate the memory by reaping
 			 * our own buffer, now let the real VM do its job and
@@ -2340,15 +2349,26 @@ rebuild_st:
 			 * defer the oom here by reporting the ENOMEM back
 			 * to userspace.
 			 */
-			reclaim = mapping_gfp_mask(mapping);
-			reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
-
-			page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
-			if (IS_ERR(page)) {
-				ret = PTR_ERR(page);
-				goto err_sg;
+			if (!*s) {
+				/* reclaim and warn, but no oom */
+				gfp = mapping_gfp_mask(mapping);
+
+				/* Our bo are always dirty and so we require
+				 * kswapd to reclaim our pages (direct reclaim
+				 * does not effectively begin pageout of our
+				 * buffers on its own). However, direct reclaim
+				 * only waits for kswapd when under allocation
+				 * congestion. So as a result __GFP_RECLAIM is
+				 * unreliable and fails to actually reclaim our
+				 * dirty pages -- unless you try over and over
+				 * again with !__GFP_NORETRY. However, we still
+				 * want to fail this allocation rather than
+				 * trigger the out-of-memory killer and for
+				 * this we want the future __GFP_MAYFAIL.
+				 */
 			}
-		}
+		} while (1);
+
 		if (!i ||
 		    sg->length >= max_segment ||
 		    page_to_pfn(page) != last_pfn + 1) {
@@ -4222,6 +4242,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
 
 	mapping = obj->base.filp->f_mapping;
 	mapping_set_gfp_mask(mapping, mask);
+	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
 
 	i915_gem_object_init(obj, &i915_gem_object_ops);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3e59c8ef27b..9ad13eeed904 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -546,11 +546,12 @@ repeat:
 }
 
 static int
-i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
+i915_gem_execbuffer_relocate_entry(struct i915_vma *vma,
 				   struct eb_vmas *eb,
 				   struct drm_i915_gem_relocation_entry *reloc,
 				   struct reloc_cache *cache)
 {
+	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct drm_gem_object *target_obj;
 	struct drm_i915_gem_object *target_i915_obj;
@@ -628,6 +629,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EINVAL;
 	}
 
+	/*
+	 * If we write into the object, we need to force the synchronisation
+	 * barrier, either with an asynchronous clflush or if we executed the
+	 * patching using the GPU (though that should be serialised by the
+	 * timeline). To be completely sure, and since we are required to
+	 * do relocations we are already stalling, disable the user's opt
+	 * of our synchronisation.
+	 */
+	vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+
 	ret = relocate_entry(obj, reloc, cache, target_offset);
 	if (ret)
 		return ret;
@@ -678,7 +689,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 		do {
 			u64 offset = r->presumed_offset;
 
-			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
+			ret = i915_gem_execbuffer_relocate_entry(vma, eb, r, &cache);
 			if (ret)
 				goto out;
 
@@ -726,7 +737,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 
 	reloc_cache_init(&cache, eb->i915);
 	for (i = 0; i < entry->relocation_count; i++) {
-		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
+		ret = i915_gem_execbuffer_relocate_entry(vma, eb, &relocs[i], &cache);
 		if (ret)
 			break;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5ddbc9499775..a74d0ac737cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -623,7 +623,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * GPU processing the request, we never over-estimate the
 	 * position of the head.
 	 */
-	req->head = req->ring->tail;
+	req->head = req->ring->emit;
 
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1642fff9cf13..ab5140ba108d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
 	GEM_BUG_ON(freespace < wqi_size);
 
 	/* The GuC firmware wants the tail index in QWords, not bytes */
-	tail = rq->tail;
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	tail >>= 3;
+	tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
 	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 
 	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1aba47024656..f066e2d785f5 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -650,6 +650,11 @@ int i915_vma_unbind(struct i915_vma *vma)
 				break;
 		}
 
+		if (!ret) {
+			ret = i915_gem_active_retire(&vma->last_fence,
+						     &vma->vm->i915->drm.struct_mutex);
+		}
+
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 96b0b01677e2..9106ea32b048 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -120,7 +120,8 @@ static void intel_crtc_init_scalers(struct intel_crtc *crtc,
 static void skylake_pfit_enable(struct intel_crtc *crtc);
 static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force);
 static void ironlake_pfit_enable(struct intel_crtc *crtc);
-static void intel_modeset_setup_hw_state(struct drm_device *dev);
+static void intel_modeset_setup_hw_state(struct drm_device *dev,
+					 struct drm_modeset_acquire_ctx *ctx);
 static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
 
 struct intel_limit {
@@ -3449,7 +3450,7 @@ __intel_display_resume(struct drm_device *dev,
 	struct drm_crtc *crtc;
 	int i, ret;
 
-	intel_modeset_setup_hw_state(dev);
+	intel_modeset_setup_hw_state(dev, ctx);
 	i915_redisable_vga(to_i915(dev));
 
 	if (!state)
@@ -5825,7 +5826,8 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
 		intel_update_watermarks(intel_crtc);
 }
 
-static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
+static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
+					struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_encoder *encoder;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -5855,7 +5857,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
 		return;
 	}
 
-	state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+	state->acquire_ctx = ctx;
 
 	/* Everything's already locked, -EDEADLK can't happen. */
 	crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
@@ -15030,7 +15032,7 @@ int intel_modeset_init(struct drm_device *dev)
 	intel_setup_outputs(dev_priv);
 
 	drm_modeset_lock_all(dev);
-	intel_modeset_setup_hw_state(dev);
+	intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx);
 	drm_modeset_unlock_all(dev);
 
 	for_each_intel_crtc(dev, crtc) {
@@ -15067,13 +15069,13 @@ int intel_modeset_init(struct drm_device *dev)
 	return 0;
 }
 
-static void intel_enable_pipe_a(struct drm_device *dev)
+static void intel_enable_pipe_a(struct drm_device *dev,
+				struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	struct drm_connector *crt = NULL;
 	struct intel_load_detect_pipe load_detect_temp;
-	struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx;
 	int ret;
 
 	/* We can't just switch on the pipe A, we need to set things up with a
@@ -15145,7 +15147,8 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
 		(HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
 }
 
-static void intel_sanitize_crtc(struct intel_crtc *crtc)
+static void intel_sanitize_crtc(struct intel_crtc *crtc,
+				struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -15191,7 +15194,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 		plane = crtc->plane;
 		crtc->base.primary->state->visible = true;
 		crtc->plane = !plane;
-		intel_crtc_disable_noatomic(&crtc->base);
+		intel_crtc_disable_noatomic(&crtc->base, ctx);
 		crtc->plane = plane;
 	}
 
@@ -15201,13 +15204,13 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 		 * resume. Force-enable the pipe to fix this, the update_dpms
 		 * call below we restore the pipe to the right state, but leave
 		 * the required bits on. */
-		intel_enable_pipe_a(dev);
+		intel_enable_pipe_a(dev, ctx);
 	}
 
 	/* Adjust the state of the output pipe according to whether we
 	 * have active connectors/encoders. */
 	if (crtc->active && !intel_crtc_has_encoders(crtc))
-		intel_crtc_disable_noatomic(&crtc->base);
+		intel_crtc_disable_noatomic(&crtc->base, ctx);
 
 	if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) {
 		/*
@@ -15505,7 +15508,8 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
  * and sanitizes it to the current state
  */
 static void
-intel_modeset_setup_hw_state(struct drm_device *dev)
+intel_modeset_setup_hw_state(struct drm_device *dev,
+			     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	enum pipe pipe;
@@ -15525,7 +15529,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
 	for_each_pipe(dev_priv, pipe) {
 		crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
 
-		intel_sanitize_crtc(crtc);
+		intel_sanitize_crtc(crtc, ctx);
 		intel_dump_pipe_config(crtc, crtc->config,
 				       "[setup_hw_state]");
 	}
diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
index 6532e226db29..40ba3134545e 100644
--- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
@@ -119,8 +119,6 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
 	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
 	struct intel_panel *panel = &connector->panel;
 
-	intel_dp_aux_enable_backlight(connector);
-
 	if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT)
 		panel->backlight.max = 0xFFFF;
 	else
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dac4e003c1f3..62f44d3e7c43 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	reg_state[CTX_RING_TAIL+1] = rq->tail;
+	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
@@ -2036,8 +2035,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 			ce->state->obj->mm.dirty = true;
 			i915_gem_object_unpin_map(ce->state->obj);
 
-			ce->ring->head = ce->ring->tail = 0;
-			intel_ring_update_space(ce->ring);
+			intel_ring_reset(ce->ring, 0);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 66a2b8b83972..513a0f4b469b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
 
 void intel_ring_update_space(struct intel_ring *ring)
 {
-	ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+	ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
 }
 
 static int
@@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 
 	i915_gem_request_submit(request);
 
-	assert_ring_tail_valid(request->ring, request->tail);
-	I915_WRITE_TAIL(request->engine, request->tail);
+	I915_WRITE_TAIL(request->engine,
+			intel_ring_set_tail(request->ring, request->tail));
 }
 
 static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1316,11 +1316,23 @@ err:
 	return PTR_ERR(addr);
 }
 
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+	GEM_BUG_ON(!list_empty(&ring->request_list));
+	ring->tail = tail;
+	ring->head = tail;
+	ring->emit = tail;
+	intel_ring_update_space(ring);
+}
+
 void intel_ring_unpin(struct intel_ring *ring)
 {
 	GEM_BUG_ON(!ring->vma);
 	GEM_BUG_ON(!ring->vaddr);
 
+	/* Discard any unused bytes beyond that submitted to hw. */
+	intel_ring_reset(ring, ring->tail);
+
 	if (i915_vma_is_map_and_fenceable(ring->vma))
 		i915_vma_unpin_iomap(ring->vma);
 	else
@@ -1562,8 +1574,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	/* Restart from the beginning of the rings for convenience */
 	for_each_engine(engine, dev_priv, id)
-		engine->buffer->head = engine->buffer->tail;
+		intel_ring_reset(engine->buffer, 0);
 }
 
 static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1616,7 +1629,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 		unsigned space;
 
 		/* Would completion of this request free enough space? */
-		space = __intel_ring_space(target->postfix, ring->tail,
+		space = __intel_ring_space(target->postfix, ring->emit,
 					   ring->size);
 		if (space >= bytes)
 			break;
@@ -1641,8 +1654,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 {
 	struct intel_ring *ring = req->ring;
-	int remain_actual = ring->size - ring->tail;
-	int remain_usable = ring->effective_size - ring->tail;
+	int remain_actual = ring->size - ring->emit;
+	int remain_usable = ring->effective_size - ring->emit;
 	int bytes = num_dwords * sizeof(u32);
 	int total_bytes, wait_bytes;
 	bool need_wrap = false;
@@ -1678,17 +1691,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 
 	if (unlikely(need_wrap)) {
 		GEM_BUG_ON(remain_actual > ring->space);
-		GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+		GEM_BUG_ON(ring->emit + remain_actual > ring->size);
 
 		/* Fill the tail with MI_NOOP */
-		memset(ring->vaddr + ring->tail, 0, remain_actual);
-		ring->tail = 0;
+		memset(ring->vaddr + ring->emit, 0, remain_actual);
+		ring->emit = 0;
 		ring->space -= remain_actual;
 	}
 
-	GEM_BUG_ON(ring->tail > ring->size - bytes);
-	cs = ring->vaddr + ring->tail;
-	ring->tail += bytes;
+	GEM_BUG_ON(ring->emit > ring->size - bytes);
+	cs = ring->vaddr + ring->emit;
+	ring->emit += bytes;
 	ring->space -= bytes;
 	GEM_BUG_ON(ring->space < 0);
 
@@ -1699,7 +1712,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 {
 	int num_dwords =
-		(req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+		(req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
 	u32 *cs;
 
 	if (num_dwords == 0)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a82a0807f64d..f7144fe09613 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -145,6 +145,7 @@ struct intel_ring {
 
 	u32 head;
 	u32 tail;
+	u32 emit;
 
 	int space;
 	int size;
@@ -488,6 +489,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
 int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_free(struct intel_ring *ring);
 
@@ -511,7 +514,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
 	 * reserved for the command packet (i.e. the value passed to
 	 * intel_ring_begin()).
 	 */
-	GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+	GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
 }
 
 static inline u32
@@ -540,7 +543,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 	GEM_BUG_ON(tail >= ring->size);
 }
 
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+	/* Whilst writes to the tail are strictly order, there is no
+	 * serialisation between readers and the writers. The tail may be
+	 * read by i915_gem_request_retire() just as it is being updated
+	 * by execlists, as although the breadcrumb is complete, the context
+	 * switch hasn't been seen.
+	 */
+	assert_ring_tail_valid(ring, tail);
+	ring->tail = tail;
+	return tail;
+}
 
 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 432480ff9d22..3178ba0c537c 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -3393,6 +3393,13 @@ void radeon_combios_asic_init(struct drm_device *dev)
 	    rdev->pdev->subsystem_vendor == 0x103c &&
 	    rdev->pdev->subsystem_device == 0x280a)
 		return;
+	/* quirk for rs4xx Toshiba Sattellite L20-183 latop to make it resume
+	 * - it hangs on resume inside the dynclk 1 table.
+	 */
+	if (rdev->family == CHIP_RS400 &&
+	    rdev->pdev->subsystem_vendor == 0x1179 &&
+	    rdev->pdev->subsystem_device == 0xff31)
+	        return;
 
 	/* DYN CLK 1 */
 	table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 6ecf42783d4b..0a6444d72000 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -136,6 +136,10 @@ static struct radeon_px_quirk radeon_px_quirk_list[] = {
 	 * https://bugzilla.kernel.org/show_bug.cgi?id=51381
 	 */
 	{ PCI_VENDOR_ID_ATI, 0x6840, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
+	/* Asus K53TK laptop with AMD A6-3420M APU and Radeon 7670m GPU
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=101491
+	 */
+	{ PCI_VENDOR_ID_ATI, 0x6741, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
 	/* macbook pro 8.2 */
 	{ PCI_VENDOR_ID_ATI, 0x6741, PCI_VENDOR_ID_APPLE, 0x00e2, RADEON_PX_QUIRK_LONG_WAKEUP },
 	{ 0, 0, 0, 0, 0 },
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
index 13db8a2851ed..1f013d45c9e9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
@@ -321,6 +321,7 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man)
 	list_for_each_entry_safe(entry, next, &man->list, head)
 		vmw_cmdbuf_res_free(man, entry);
 
+	drm_ht_remove(&man->resources);
 	kfree(man);
 }
 
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 95ed17183e73..54a47b40546f 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -734,9 +734,9 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
 		 * the first read operation, otherwise the first read cost
 		 * one extra clock cycle.
 		 */
-		temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+		temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
 		temp |= I2CR_MTX;
-		writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+		imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
 	}
 	msgs->buf[msgs->len-1] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
 
@@ -857,9 +857,9 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo
 				 * the first read operation, otherwise the first read cost
 				 * one extra clock cycle.
 				 */
-				temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+				temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
 				temp |= I2CR_MTX;
-				writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+				imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
 			}
 		} else if (i == (msgs->len - 2)) {
 			dev_dbg(&i2c_imx->adapter.dev,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 9f7e18612322..dc2f59e33971 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -223,8 +223,8 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
 	return 0;
 }
 
-static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
-				 struct ib_port_attr *props)
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+				struct ib_port_attr *props)
 {
 	struct mlx5_ib_dev *dev = to_mdev(device);
 	struct mlx5_core_dev *mdev = dev->mdev;
@@ -232,12 +232,14 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 	enum ib_mtu ndev_ib_mtu;
 	u16 qkey_viol_cntr;
 	u32 eth_prot_oper;
+	int err;
 
 	/* Possible bad flows are checked before filling out props so in case
 	 * of an error it will still be zeroed out.
 	 */
-	if (mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num))
-		return;
+	err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num);
+	if (err)
+		return err;
 
 	translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
 				 &props->active_width);
@@ -258,7 +260,7 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 
 	ndev = mlx5_ib_get_netdev(device, port_num);
 	if (!ndev)
-		return;
+		return 0;
 
 	if (mlx5_lag_is_active(dev->mdev)) {
 		rcu_read_lock();
@@ -281,75 +283,49 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 	dev_put(ndev);
 
 	props->active_mtu	= min(props->max_mtu, ndev_ib_mtu);
+	return 0;
 }
 
-static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
-				     const struct ib_gid_attr *attr,
-				     void *mlx5_addr)
+static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
+			 unsigned int index, const union ib_gid *gid,
+			 const struct ib_gid_attr *attr)
 {
-#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
-	char *mlx5_addr_l3_addr	= MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
-					       source_l3_address);
-	void *mlx5_addr_mac	= MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
-					       source_mac_47_32);
-
-	if (!gid)
-		return;
+	enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+	u8 roce_version = 0;
+	u8 roce_l3_type = 0;
+	bool vlan = false;
+	u8 mac[ETH_ALEN];
+	u16 vlan_id = 0;
 
-	ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+	if (gid) {
+		gid_type = attr->gid_type;
+		ether_addr_copy(mac, attr->ndev->dev_addr);
 
-	if (is_vlan_dev(attr->ndev)) {
-		MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
-		MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+		if (is_vlan_dev(attr->ndev)) {
+			vlan = true;
+			vlan_id = vlan_dev_vlan_id(attr->ndev);
+		}
 	}
 
-	switch (attr->gid_type) {
+	switch (gid_type) {
 	case IB_GID_TYPE_IB:
-		MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+		roce_version = MLX5_ROCE_VERSION_1;
 		break;
 	case IB_GID_TYPE_ROCE_UDP_ENCAP:
-		MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+		roce_version = MLX5_ROCE_VERSION_2;
+		if (ipv6_addr_v4mapped((void *)gid))
+			roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
+		else
+			roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
 		break;
 
 	default:
-		WARN_ON(true);
+		mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type);
 	}
 
-	if (attr->gid_type != IB_GID_TYPE_IB) {
-		if (ipv6_addr_v4mapped((void *)gid))
-			MLX5_SET_RA(mlx5_addr, roce_l3_type,
-				    MLX5_ROCE_L3_TYPE_IPV4);
-		else
-			MLX5_SET_RA(mlx5_addr, roce_l3_type,
-				    MLX5_ROCE_L3_TYPE_IPV6);
-	}
-
-	if ((attr->gid_type == IB_GID_TYPE_IB) ||
-	    !ipv6_addr_v4mapped((void *)gid))
-		memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
-	else
-		memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
-}
-
-static int set_roce_addr(struct ib_device *device, u8 port_num,
-			 unsigned int index,
-			 const union ib_gid *gid,
-			 const struct ib_gid_attr *attr)
-{
-	struct mlx5_ib_dev *dev = to_mdev(device);
-	u32  in[MLX5_ST_SZ_DW(set_roce_address_in)]  = {0};
-	u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
-	void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
-	enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
-
-	if (ll != IB_LINK_LAYER_ETHERNET)
-		return -EINVAL;
-
-	ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
-
-	MLX5_SET(set_roce_address_in, in, roce_address_index, index);
-	MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
-	return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+	return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
+				      roce_l3_type, gid->raw, mac, vlan,
+				      vlan_id);
 }
 
 static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
@@ -357,13 +333,13 @@ static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
 			   const struct ib_gid_attr *attr,
 			   __always_unused void **context)
 {
-	return set_roce_addr(device, port_num, index, gid, attr);
+	return set_roce_addr(to_mdev(device), port_num, index, gid, attr);
 }
 
 static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
 			   unsigned int index, __always_unused void **context)
 {
-	return set_roce_addr(device, port_num, index, NULL, NULL);
+	return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL);
 }
 
 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
@@ -978,20 +954,31 @@ out:
 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 		       struct ib_port_attr *props)
 {
+	unsigned int count;
+	int ret;
+
 	switch (mlx5_get_vport_access_method(ibdev)) {
 	case MLX5_VPORT_ACCESS_METHOD_MAD:
-		return mlx5_query_mad_ifc_port(ibdev, port, props);
+		ret = mlx5_query_mad_ifc_port(ibdev, port, props);
+		break;
 
 	case MLX5_VPORT_ACCESS_METHOD_HCA:
-		return mlx5_query_hca_port(ibdev, port, props);
+		ret = mlx5_query_hca_port(ibdev, port, props);
+		break;
 
 	case MLX5_VPORT_ACCESS_METHOD_NIC:
-		mlx5_query_port_roce(ibdev, port, props);
-		return 0;
+		ret = mlx5_query_port_roce(ibdev, port, props);
+		break;
 
 	default:
-		return -EINVAL;
+		ret = -EINVAL;
+	}
+
+	if (!ret && props) {
+		count = mlx5_core_reserved_gids_count(to_mdev(ibdev)->mdev);
+		props->gid_tbl_len -= count;
 	}
+	return ret;
 }
 
 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 30b256a2c54e..de4025deaa4a 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -742,7 +742,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
 
 	if (type == NES_TIMER_TYPE_SEND) {
 		new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
-		atomic_inc(&new_send->skb->users);
+		refcount_inc(&new_send->skb->users);
 		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
 		cm_node->send_entry = new_send;
 		add_ref_cm_node(cm_node);
@@ -924,7 +924,7 @@ static void nes_cm_timer_tick(unsigned long pass)
 						  flags);
 				break;
 			}
-			atomic_inc(&send_entry->skb->users);
+			refcount_inc(&send_entry->skb->users);
 			cm_packets_retrans++;
 			nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
 				  "for node %p, jiffies = %lu, time to send = "
diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
index e37d37273182..f600f3a7a3c6 100644
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c
@@ -248,7 +248,8 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
 
 	if (!btns_desc) {
 		dev_err(dev, "ACPI Button Descriptors not found\n");
-		return ERR_PTR(-ENODEV);
+		button_info = ERR_PTR(-ENODEV);
+		goto out;
 	}
 
 	/* The first package describes the collection */
@@ -264,24 +265,31 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
 	}
 	if (collection_uid == -1) {
 		dev_err(dev, "Invalid Button Collection Descriptor\n");
-		return ERR_PTR(-ENODEV);
+		button_info = ERR_PTR(-ENODEV);
+		goto out;
 	}
 
 	/* There are package.count - 1 buttons + 1 terminating empty entry */
 	button_info = devm_kcalloc(dev, btns_desc->package.count,
 				   sizeof(*button_info), GFP_KERNEL);
-	if (!button_info)
-		return ERR_PTR(-ENOMEM);
+	if (!button_info) {
+		button_info = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 
 	/* Parse the button descriptors */
 	for (i = 1, btn = 0; i < btns_desc->package.count; i++, btn++) {
 		if (soc_button_parse_btn_desc(dev,
 					      &btns_desc->package.elements[i],
 					      collection_uid,
-					      &button_info[btn]))
-			return ERR_PTR(-ENODEV);
+					      &button_info[btn])) {
+			button_info = ERR_PTR(-ENODEV);
+			goto out;
+		}
 	}
 
+out:
+	kfree(buf.pointer);
 	return button_info;
 }
 
diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c
index dea63e2db3e6..f5206e2c767e 100644
--- a/drivers/input/rmi4/rmi_f54.c
+++ b/drivers/input/rmi4/rmi_f54.c
@@ -31,9 +31,6 @@
 #define F54_GET_REPORT          1
 #define F54_FORCE_CAL           2
 
-/* Fixed sizes of reports */
-#define F54_QUERY_LEN			27
-
 /* F54 capabilities */
 #define F54_CAP_BASELINE	(1 << 2)
 #define F54_CAP_IMAGE8		(1 << 3)
@@ -95,7 +92,6 @@ struct rmi_f54_reports {
 struct f54_data {
 	struct rmi_function *fn;
 
-	u8 qry[F54_QUERY_LEN];
 	u8 num_rx_electrodes;
 	u8 num_tx_electrodes;
 	u8 capabilities;
@@ -632,22 +628,23 @@ static int rmi_f54_detect(struct rmi_function *fn)
 {
 	int error;
 	struct f54_data *f54;
+	u8 buf[6];
 
 	f54 = dev_get_drvdata(&fn->dev);
 
 	error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
-			       &f54->qry, sizeof(f54->qry));
+			       buf, sizeof(buf));
 	if (error) {
 		dev_err(&fn->dev, "%s: Failed to query F54 properties\n",
 			__func__);
 		return error;
 	}
 
-	f54->num_rx_electrodes = f54->qry[0];
-	f54->num_tx_electrodes = f54->qry[1];
-	f54->capabilities = f54->qry[2];
-	f54->clock_rate = f54->qry[3] | (f54->qry[4] << 8);
-	f54->family = f54->qry[5];
+	f54->num_rx_electrodes = buf[0];
+	f54->num_tx_electrodes = buf[1];
+	f54->capabilities = buf[2];
+	f54->clock_rate = buf[3] | (buf[4] << 8);
+	f54->family = buf[5];
 
 	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "F54 num_rx_electrodes: %d\n",
 		f54->num_rx_electrodes);
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 09720d950686..f932a83b4990 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -723,6 +723,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"),
 		},
 	},
+	{
+		/* Fujitsu UH554 laptop */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"),
+		},
+	},
 	{ }
 };
 
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index eb7fbe159963..929f8558bf1c 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -140,7 +140,7 @@ static inline void gic_map_to_vpe(unsigned int intr, unsigned int vpe)
 }
 
 #ifdef CONFIG_CLKSRC_MIPS_GIC
-u64 gic_read_count(void)
+u64 notrace gic_read_count(void)
 {
 	unsigned int hi, hi2, lo;
 
@@ -167,7 +167,7 @@ unsigned int gic_get_count_width(void)
 	return bits;
 }
 
-void gic_write_compare(u64 cnt)
+void notrace gic_write_compare(u64 cnt)
 {
 	if (mips_cm_is64) {
 		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE), cnt);
@@ -179,7 +179,7 @@ void gic_write_compare(u64 cnt)
 	}
 }
 
-void gic_write_cpu_compare(u64 cnt, int cpu)
+void notrace gic_write_cpu_compare(u64 cnt, int cpu)
 {
 	unsigned long flags;
 
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 99e5f9751e8b..c5603d1a07d6 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -155,7 +155,7 @@ mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	copied = skb->len + MISDN_HEADER_LEN;
 	if (len < copied) {
 		if (flags & MSG_PEEK)
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 		else
 			skb_queue_head(&sk->sk_receive_queue, skb);
 		return -ENOSPC;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 7910bfe50da4..93b181088168 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1105,10 +1105,13 @@ static void schedule_autocommit(struct dm_integrity_c *ic)
 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
 {
 	struct bio *bio;
-	spin_lock_irq(&ic->endio_wait.lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ic->endio_wait.lock, flags);
 	bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 	bio_list_add(&ic->flush_bio_list, bio);
-	spin_unlock_irq(&ic->endio_wait.lock);
+	spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
+
 	queue_work(ic->commit_wq, &ic->commit_work);
 }
 
@@ -3040,6 +3043,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		ti->error = "The device is too small";
 		goto bad;
 	}
+	if (ti->len > ic->provided_data_sectors) {
+		r = -EINVAL;
+		ti->error = "Not enough provided sectors for requested mapping size";
+		goto bad;
+	}
 
 	if (!buffer_sectors)
 		buffer_sectors = 1;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 3702e502466d..8d5ca30f6551 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -317,8 +317,8 @@ static void do_region(int op, int op_flags, unsigned region,
 	else if (op == REQ_OP_WRITE_SAME)
 		special_cmd_max_sectors = q->limits.max_write_same_sectors;
 	if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
-	     op == REQ_OP_WRITE_SAME)  &&
-	    special_cmd_max_sectors == 0) {
+	     op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
+		atomic_inc(&io->count);
 		dec_count(io, region, -EOPNOTSUPP);
 		return;
 	}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7d893228c40f..b4b75dad816a 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1927,7 +1927,7 @@ struct dm_raid_superblock {
 	/********************************************************************
 	 * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
 	 *
-	 * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist
+	 * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
 	 */
 
 	__le32 flags; /* Flags defining array states for reshaping */
@@ -2092,6 +2092,11 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
 	sb->layout = cpu_to_le32(mddev->layout);
 	sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
 
+	/********************************************************************
+	 * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
+	 *
+	 * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
+	 */
 	sb->new_level = cpu_to_le32(mddev->new_level);
 	sb->new_layout = cpu_to_le32(mddev->new_layout);
 	sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors);
@@ -2438,8 +2443,14 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
 	mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
 
 	if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
-		/* Retrieve device size stored in superblock to be prepared for shrink */
-		rdev->sectors = le64_to_cpu(sb->sectors);
+		/*
+		 * Retrieve rdev size stored in superblock to be prepared for shrink.
+		 * Check extended superblock members are present otherwise the size
+		 * will not be set!
+		 */
+		if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190)
+			rdev->sectors = le64_to_cpu(sb->sectors);
+
 		rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
 		if (rdev->recovery_offset == MaxSector)
 			set_bit(In_sync, &rdev->flags);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index e61c45047c25..4da8858856fb 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,6 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
 
 struct dm_raid1_bio_record {
 	struct mirror *m;
+	/* if details->bi_bdev == NULL, details were not saved */
 	struct dm_bio_details details;
 	region_t write_region;
 };
@@ -1198,6 +1199,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	struct dm_raid1_bio_record *bio_record =
 	  dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
 
+	bio_record->details.bi_bdev = NULL;
+
 	if (rw == WRITE) {
 		/* Save region for mirror_end_io() handler */
 		bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
@@ -1256,12 +1259,22 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 	}
 
 	if (error == -EOPNOTSUPP)
-		return error;
+		goto out;
 
 	if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
-		return error;
+		goto out;
 
 	if (unlikely(error)) {
+		if (!bio_record->details.bi_bdev) {
+			/*
+			 * There wasn't enough memory to record necessary
+			 * information for a retry or there was no other
+			 * mirror in-sync.
+			 */
+			DMERR_LIMIT("Mirror read failed.");
+			return -EIO;
+		}
+
 		m = bio_record->m;
 
 		DMERR("Mirror read failed from %s. Trying alternative device.",
@@ -1277,6 +1290,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 			bd = &bio_record->details;
 
 			dm_bio_restore(bd, bio);
+			bio_record->details.bi_bdev = NULL;
 			bio->bi_error = 0;
 
 			queue_bio(ms, bio, rw);
@@ -1285,6 +1299,9 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 		DMERR("All replicated volumes dead, failing I/O");
 	}
 
+out:
+	bio_record->details.bi_bdev = NULL;
+
 	return error;
 }
 
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 17ad50daed08..28808e5ec0fd 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1094,6 +1094,19 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 		return;
 	}
 
+	/*
+	 * Increment the unmapped blocks.  This prevents a race between the
+	 * passdown io and reallocation of freed blocks.
+	 */
+	r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+	if (r) {
+		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+		bio_io_error(m->bio);
+		cell_defer_no_holder(tc, m->cell);
+		mempool_free(m, pool->mapping_pool);
+		return;
+	}
+
 	discard_parent = bio_alloc(GFP_NOIO, 1);
 	if (!discard_parent) {
 		DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
@@ -1114,19 +1127,6 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 			end_discard(&op, r);
 		}
 	}
-
-	/*
-	 * Increment the unmapped blocks.  This prevents a race between the
-	 * passdown io and reallocation of freed blocks.
-	 */
-	r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
-	if (r) {
-		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
-		bio_io_error(m->bio);
-		cell_defer_no_holder(tc, m->cell);
-		mempool_free(m, pool->mapping_pool);
-		return;
-	}
 }
 
 static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 75488e65cd96..8d46e3ad9529 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -245,8 +245,7 @@ static int arizona_poll_reg(struct arizona *arizona,
 	int ret;
 
 	ret = regmap_read_poll_timeout(arizona->regmap,
-				       ARIZONA_INTERRUPT_RAW_STATUS_5, val,
-				       ((val & mask) == target),
+				       reg, val, ((val & mask) == target),
 				       ARIZONA_REG_POLL_DELAY_US,
 				       timeout_ms * 1000);
 	if (ret)
diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h
index 20bfb9ba83ea..cbb4f8566bbe 100644
--- a/drivers/net/arcnet/arcdevice.h
+++ b/drivers/net/arcnet/arcdevice.h
@@ -269,6 +269,10 @@ struct arcnet_local {
 
 	struct timer_list	timer;
 
+	struct net_device *dev;
+	int reply_status;
+	struct tasklet_struct reply_tasklet;
+
 	/*
 	 * Buffer management: an ARCnet card has 4 x 512-byte buffers, each of
 	 * which can be used for either sending or receiving.  The new dynamic
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 62ee439d5882..fcfccbb3d9a2 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -51,6 +51,7 @@
 #include <net/arp.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
+#include <linux/errqueue.h>
 
 #include <linux/leds.h>
 
@@ -391,6 +392,52 @@ static void arcnet_timer(unsigned long data)
 	}
 }
 
+static void arcnet_reply_tasklet(unsigned long data)
+{
+	struct arcnet_local *lp = (struct arcnet_local *)data;
+
+	struct sk_buff *ackskb, *skb;
+	struct sock_exterr_skb *serr;
+	struct sock *sk;
+	int ret;
+
+	local_irq_disable();
+	skb = lp->outgoing.skb;
+	if (!skb || !skb->sk) {
+		local_irq_enable();
+		return;
+	}
+
+	sock_hold(skb->sk);
+	sk = skb->sk;
+	ackskb = skb_clone_sk(skb);
+	sock_put(skb->sk);
+
+	if (!ackskb) {
+		local_irq_enable();
+		return;
+	}
+
+	serr = SKB_EXT_ERR(ackskb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
+	serr->ee.ee_data = skb_shinfo(skb)->tskey;
+	serr->ee.ee_info = lp->reply_status;
+
+	/* finally erasing outgoing skb */
+	dev_kfree_skb(lp->outgoing.skb);
+	lp->outgoing.skb = NULL;
+
+	ackskb->dev = lp->dev;
+
+	ret = sock_queue_err_skb(sk, ackskb);
+	if (ret)
+		kfree_skb(ackskb);
+
+	local_irq_enable();
+};
+
 struct net_device *alloc_arcdev(const char *name)
 {
 	struct net_device *dev;
@@ -401,6 +448,7 @@ struct net_device *alloc_arcdev(const char *name)
 	if (dev) {
 		struct arcnet_local *lp = netdev_priv(dev);
 
+		lp->dev = dev;
 		spin_lock_init(&lp->lock);
 		init_timer(&lp->timer);
 		lp->timer.data = (unsigned long) dev;
@@ -436,6 +484,9 @@ int arcnet_open(struct net_device *dev)
 		arc_cont(D_PROTO, "\n");
 	}
 
+	tasklet_init(&lp->reply_tasklet, arcnet_reply_tasklet,
+		     (unsigned long)lp);
+
 	arc_printk(D_INIT, dev, "arcnet_open: resetting card.\n");
 
 	/* try to put the card in a defined state - if it fails the first
@@ -527,6 +578,8 @@ int arcnet_close(struct net_device *dev)
 	netif_stop_queue(dev);
 	netif_carrier_off(dev);
 
+	tasklet_kill(&lp->reply_tasklet);
+
 	/* flush TX and disable RX */
 	lp->hw.intmask(dev, 0);
 	lp->hw.command(dev, NOTXcmd);	/* stop transmit */
@@ -635,13 +688,13 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 		txbuf = -1;
 
 	if (txbuf != -1) {
+		lp->outgoing.skb = skb;
 		if (proto->prepare_tx(dev, pkt, skb->len, txbuf) &&
 		    !proto->ack_tx) {
 			/* done right away and we don't want to acknowledge
 			 *  the package later - forget about it now
 			 */
 			dev->stats.tx_bytes += skb->len;
-			dev_kfree_skb(skb);
 		} else {
 			/* do it the 'split' way */
 			lp->outgoing.proto = proto;
@@ -756,6 +809,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	struct net_device *dev = dev_id;
 	struct arcnet_local *lp;
 	int recbuf, status, diagstatus, didsomething, boguscount;
+	unsigned long flags;
 	int retval = IRQ_NONE;
 
 	arc_printk(D_DURING, dev, "\n");
@@ -765,7 +819,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	lp = netdev_priv(dev);
 	BUG_ON(!lp);
 
-	spin_lock(&lp->lock);
+	spin_lock_irqsave(&lp->lock, flags);
 
 	/* RESET flag was enabled - if device is not running, we must
 	 * clear it right away (but nothing else).
@@ -774,7 +828,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		if (lp->hw.status(dev) & RESETflag)
 			lp->hw.command(dev, CFLAGScmd | RESETclear);
 		lp->hw.intmask(dev, 0);
-		spin_unlock(&lp->lock);
+		spin_unlock_irqrestore(&lp->lock, flags);
 		return retval;
 	}
 
@@ -842,8 +896,16 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 
 		/* a transmit finished, and we're interested in it. */
 		if ((status & lp->intmask & TXFREEflag) || lp->timed_out) {
+			int ackstatus;
 			lp->intmask &= ~(TXFREEflag | EXCNAKflag);
 
+			if (status & TXACKflag)
+				ackstatus = 2;
+			else if (lp->excnak_pending)
+				ackstatus = 1;
+			else
+				ackstatus = 0;
+
 			arc_printk(D_DURING, dev, "TX IRQ (stat=%Xh)\n",
 				   status);
 
@@ -866,18 +928,11 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 
 				if (lp->outgoing.proto &&
 				    lp->outgoing.proto->ack_tx) {
-					int ackstatus;
-
-					if (status & TXACKflag)
-						ackstatus = 2;
-					else if (lp->excnak_pending)
-						ackstatus = 1;
-					else
-						ackstatus = 0;
-
 					lp->outgoing.proto
 						->ack_tx(dev, ackstatus);
 				}
+				lp->reply_status = ackstatus;
+				tasklet_hi_schedule(&lp->reply_tasklet);
 			}
 			if (lp->cur_tx != -1)
 				release_arcbuf(dev, lp->cur_tx);
@@ -998,7 +1053,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	udelay(1);
 	lp->hw.intmask(dev, lp->intmask);
 
-	spin_unlock(&lp->lock);
+	spin_unlock_irqrestore(&lp->lock, flags);
 	return retval;
 }
 EXPORT_SYMBOL(arcnet_interrupt);
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index a80f4eb9262d..b780be6f41ff 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -212,7 +212,7 @@ static int ack_tx(struct net_device *dev, int acked)
 	ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */
 	ackpkt->soft.cap.mes.ack = acked;
 
-	arc_printk(D_PROTO, dev, "Ackknowledge for cap packet %x.\n",
+	arc_printk(D_PROTO, dev, "Acknowledge for cap packet %x.\n",
 		   *((int *)&ackpkt->soft.cap.cookie[0]));
 
 	ackskb->protocol = cpu_to_be16(ETH_P_ARCNET);
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 239de38fbd6a..2d956cb59d06 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -93,6 +93,27 @@ static void led_recon_set(struct led_classdev *led_cdev,
 	outb(!!value, priv->misc + ci->leds[card->index].red);
 }
 
+static ssize_t backplane_mode_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct net_device *net_dev = to_net_dev(dev);
+	struct arcnet_local *lp = netdev_priv(net_dev);
+
+	return sprintf(buf, "%s\n", lp->backplane ? "true" : "false");
+}
+static DEVICE_ATTR_RO(backplane_mode);
+
+static struct attribute *com20020_state_attrs[] = {
+	&dev_attr_backplane_mode.attr,
+	NULL,
+};
+
+static struct attribute_group com20020_state_group = {
+	.name = NULL,
+	.attrs = com20020_state_attrs,
+};
+
 static void com20020pci_remove(struct pci_dev *pdev);
 
 static int com20020pci_probe(struct pci_dev *pdev,
@@ -135,6 +156,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
 	for (i = 0; i < ci->devcount; i++) {
 		struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i];
 		struct com20020_dev *card;
+		int dev_id_mask = 0xf;
 
 		dev = alloc_arcdev(device);
 		if (!dev) {
@@ -166,8 +188,10 @@ static int com20020pci_probe(struct pci_dev *pdev,
 		arcnet_outb(0x00, ioaddr, COM20020_REG_W_COMMAND);
 		arcnet_inb(ioaddr, COM20020_REG_R_DIAGSTAT);
 
+		SET_NETDEV_DEV(dev, &pdev->dev);
 		dev->base_addr = ioaddr;
 		dev->dev_addr[0] = node;
+		dev->sysfs_groups[0] = &com20020_state_group;
 		dev->irq = pdev->irq;
 		lp->card_name = "PCI COM20020";
 		lp->card_flags = ci->flags;
@@ -177,10 +201,15 @@ static int com20020pci_probe(struct pci_dev *pdev,
 		lp->timeout = timeout;
 		lp->hw.owner = THIS_MODULE;
 
+		lp->backplane = (inb(priv->misc) >> (2 + i)) & 0x1;
+
+		if (!strncmp(ci->name, "EAE PLX-PCI FB2", 15))
+			lp->backplane = 1;
+
 		/* Get the dev_id from the PLX rotary coder */
 		if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15))
-			dev->dev_id = 0xc;
-		dev->dev_id ^= inb(priv->misc + ci->rotary) >> 4;
+			dev_id_mask = 0x3;
+		dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask;
 
 		snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i);
 
@@ -361,6 +390,31 @@ static struct com20020_pci_card_info card_info_eae_ma1 = {
 	.flags = ARC_CAN_10MBIT,
 };
 
+static struct com20020_pci_card_info card_info_eae_fb2 = {
+	.name = "EAE PLX-PCI FB2",
+	.devcount = 1,
+	.chan_map_tbl = {
+		{
+			.bar = 2,
+			.offset = 0x00,
+			.size = 0x08,
+		},
+	},
+	.misc_map = {
+		.bar = 2,
+		.offset = 0x10,
+		.size = 0x04,
+	},
+	.leds = {
+		{
+			.green = 0x0,
+			.red = 0x1,
+		},
+	},
+	.rotary = 0x0,
+	.flags = ARC_CAN_10MBIT,
+};
+
 static const struct pci_device_id com20020pci_id_table[] = {
 	{
 		0x1571, 0xa001,
@@ -507,6 +561,12 @@ static const struct pci_device_id com20020pci_id_table[] = {
 		(kernel_ulong_t)&card_info_eae_ma1
 	},
 	{
+		0x10B5, 0x9050,
+		0x10B5, 0x3294,
+		0, 0,
+		(kernel_ulong_t)&card_info_eae_fb2
+	},
+	{
 		0x14BA, 0x6000,
 		PCI_ANY_ID, PCI_ANY_ID,
 		0, 0,
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index 13d9ad4b3f5c..78043a9c5981 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -246,8 +246,6 @@ int com20020_found(struct net_device *dev, int shared)
 		return -ENODEV;
 	}
 
-	dev->base_addr = ioaddr;
-
 	arc_printk(D_NORMAL, dev, "%s: station %02Xh found at %03lXh, IRQ %d.\n",
 		   lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq);
 
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 8ca683396fcc..a12d603d41c6 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -464,7 +464,7 @@ const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val)
 
 /* Searches for a value in opt's values[] table which matches the flagmask */
 static const struct bond_opt_value *bond_opt_get_flags(const struct bond_option *opt,
-						 u32 flagmask)
+						       u32 flagmask)
 {
 	int i;
 
@@ -744,14 +744,14 @@ static int bond_option_mode_set(struct bonding *bond,
 				const struct bond_opt_value *newval)
 {
 	if (!bond_mode_uses_arp(newval->value) && bond->params.arp_interval) {
-		netdev_info(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n",
-			    newval->string);
+		netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n",
+			   newval->string);
 		/* disable arp monitoring */
 		bond->params.arp_interval = 0;
 		/* set miimon to default value */
 		bond->params.miimon = BOND_DEFAULT_MIIMON;
-		netdev_info(bond->dev, "Setting MII monitoring interval to %d\n",
-			    bond->params.miimon);
+		netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n",
+			   bond->params.miimon);
 	}
 
 	/* don't cache arp_validate between modes */
@@ -794,7 +794,7 @@ static int bond_option_active_slave_set(struct bonding *bond,
 	block_netpoll_tx();
 	/* check to see if we are clearing active */
 	if (!slave_dev) {
-		netdev_info(bond->dev, "Clearing current active slave\n");
+		netdev_dbg(bond->dev, "Clearing current active slave\n");
 		RCU_INIT_POINTER(bond->curr_active_slave, NULL);
 		bond_select_active_slave(bond);
 	} else {
@@ -805,13 +805,13 @@ static int bond_option_active_slave_set(struct bonding *bond,
 
 		if (new_active == old_active) {
 			/* do nothing */
-			netdev_info(bond->dev, "%s is already the current active slave\n",
-				    new_active->dev->name);
+			netdev_dbg(bond->dev, "%s is already the current active slave\n",
+				   new_active->dev->name);
 		} else {
 			if (old_active && (new_active->link == BOND_LINK_UP) &&
 			    bond_slave_is_up(new_active)) {
-				netdev_info(bond->dev, "Setting %s as active slave\n",
-					    new_active->dev->name);
+				netdev_dbg(bond->dev, "Setting %s as active slave\n",
+					   new_active->dev->name);
 				bond_change_active_slave(bond, new_active);
 			} else {
 				netdev_err(bond->dev, "Could not set %s as active slave; either %s is down or the link is down\n",
@@ -833,17 +833,17 @@ static int bond_option_active_slave_set(struct bonding *bond,
 static int bond_option_miimon_set(struct bonding *bond,
 				  const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting MII monitoring interval to %llu\n",
-		    newval->value);
+	netdev_dbg(bond->dev, "Setting MII monitoring interval to %llu\n",
+		   newval->value);
 	bond->params.miimon = newval->value;
 	if (bond->params.updelay)
-		netdev_info(bond->dev, "Note: Updating updelay (to %d) since it is a multiple of the miimon value\n",
-			bond->params.updelay * bond->params.miimon);
+		netdev_dbg(bond->dev, "Note: Updating updelay (to %d) since it is a multiple of the miimon value\n",
+			   bond->params.updelay * bond->params.miimon);
 	if (bond->params.downdelay)
-		netdev_info(bond->dev, "Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n",
-			    bond->params.downdelay * bond->params.miimon);
+		netdev_dbg(bond->dev, "Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n",
+			   bond->params.downdelay * bond->params.miimon);
 	if (newval->value && bond->params.arp_interval) {
-		netdev_info(bond->dev, "MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n");
+		netdev_dbg(bond->dev, "MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n");
 		bond->params.arp_interval = 0;
 		if (bond->params.arp_validate)
 			bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
@@ -885,8 +885,8 @@ static int bond_option_updelay_set(struct bonding *bond,
 			    bond->params.miimon);
 	}
 	bond->params.updelay = value / bond->params.miimon;
-	netdev_info(bond->dev, "Setting up delay to %d\n",
-		    bond->params.updelay * bond->params.miimon);
+	netdev_dbg(bond->dev, "Setting up delay to %d\n",
+		   bond->params.updelay * bond->params.miimon);
 
 	return 0;
 }
@@ -907,8 +907,8 @@ static int bond_option_downdelay_set(struct bonding *bond,
 			    bond->params.miimon);
 	}
 	bond->params.downdelay = value / bond->params.miimon;
-	netdev_info(bond->dev, "Setting down delay to %d\n",
-		    bond->params.downdelay * bond->params.miimon);
+	netdev_dbg(bond->dev, "Setting down delay to %d\n",
+		   bond->params.downdelay * bond->params.miimon);
 
 	return 0;
 }
@@ -916,8 +916,8 @@ static int bond_option_downdelay_set(struct bonding *bond,
 static int bond_option_use_carrier_set(struct bonding *bond,
 				       const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting use_carrier to %llu\n",
-		    newval->value);
+	netdev_dbg(bond->dev, "Setting use_carrier to %llu\n",
+		   newval->value);
 	bond->params.use_carrier = newval->value;
 
 	return 0;
@@ -930,16 +930,16 @@ static int bond_option_use_carrier_set(struct bonding *bond,
 static int bond_option_arp_interval_set(struct bonding *bond,
 					const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting ARP monitoring interval to %llu\n",
-		    newval->value);
+	netdev_dbg(bond->dev, "Setting ARP monitoring interval to %llu\n",
+		   newval->value);
 	bond->params.arp_interval = newval->value;
 	if (newval->value) {
 		if (bond->params.miimon) {
-			netdev_info(bond->dev, "ARP monitoring cannot be used with MII monitoring. Disabling MII monitoring\n");
+			netdev_dbg(bond->dev, "ARP monitoring cannot be used with MII monitoring. Disabling MII monitoring\n");
 			bond->params.miimon = 0;
 		}
 		if (!bond->params.arp_targets[0])
-			netdev_info(bond->dev, "ARP monitoring has been set up, but no ARP targets have been specified\n");
+			netdev_dbg(bond->dev, "ARP monitoring has been set up, but no ARP targets have been specified\n");
 	}
 	if (bond->dev->flags & IFF_UP) {
 		/* If the interface is up, we may need to fire off
@@ -1000,7 +1000,7 @@ static int _bond_option_arp_ip_target_add(struct bonding *bond, __be32 target)
 		return -EINVAL;
 	}
 
-	netdev_info(bond->dev, "Adding ARP target %pI4\n", &target);
+	netdev_dbg(bond->dev, "Adding ARP target %pI4\n", &target);
 
 	_bond_options_arp_ip_target_set(bond, ind, target, jiffies);
 
@@ -1036,7 +1036,7 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
 	if (ind == 0 && !targets[1] && bond->params.arp_interval)
 		netdev_warn(bond->dev, "Removing last arp target with arp_interval on\n");
 
-	netdev_info(bond->dev, "Removing ARP target %pI4\n", &target);
+	netdev_dbg(bond->dev, "Removing ARP target %pI4\n", &target);
 
 	bond_for_each_slave(bond, slave, iter) {
 		targets_rx = slave->target_last_arp_rx;
@@ -1088,8 +1088,8 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
 static int bond_option_arp_validate_set(struct bonding *bond,
 					const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting arp_validate to %s (%llu)\n",
-		    newval->string, newval->value);
+	netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n",
+		   newval->string, newval->value);
 
 	if (bond->dev->flags & IFF_UP) {
 		if (!newval->value)
@@ -1105,8 +1105,8 @@ static int bond_option_arp_validate_set(struct bonding *bond,
 static int bond_option_arp_all_targets_set(struct bonding *bond,
 					   const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting arp_all_targets to %s (%llu)\n",
-		    newval->string, newval->value);
+	netdev_dbg(bond->dev, "Setting arp_all_targets to %s (%llu)\n",
+		   newval->string, newval->value);
 	bond->params.arp_all_targets = newval->value;
 
 	return 0;
@@ -1126,7 +1126,7 @@ static int bond_option_primary_set(struct bonding *bond,
 		*p = '\0';
 	/* check to see if we are clearing primary */
 	if (!strlen(primary)) {
-		netdev_info(bond->dev, "Setting primary slave to None\n");
+		netdev_dbg(bond->dev, "Setting primary slave to None\n");
 		RCU_INIT_POINTER(bond->primary_slave, NULL);
 		memset(bond->params.primary, 0, sizeof(bond->params.primary));
 		bond_select_active_slave(bond);
@@ -1135,8 +1135,8 @@ static int bond_option_primary_set(struct bonding *bond,
 
 	bond_for_each_slave(bond, slave, iter) {
 		if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) {
-			netdev_info(bond->dev, "Setting %s as primary slave\n",
-				    slave->dev->name);
+			netdev_dbg(bond->dev, "Setting %s as primary slave\n",
+				   slave->dev->name);
 			rcu_assign_pointer(bond->primary_slave, slave);
 			strcpy(bond->params.primary, slave->dev->name);
 			bond_select_active_slave(bond);
@@ -1145,15 +1145,15 @@ static int bond_option_primary_set(struct bonding *bond,
 	}
 
 	if (rtnl_dereference(bond->primary_slave)) {
-		netdev_info(bond->dev, "Setting primary slave to None\n");
+		netdev_dbg(bond->dev, "Setting primary slave to None\n");
 		RCU_INIT_POINTER(bond->primary_slave, NULL);
 		bond_select_active_slave(bond);
 	}
 	strncpy(bond->params.primary, primary, IFNAMSIZ);
 	bond->params.primary[IFNAMSIZ - 1] = 0;
 
-	netdev_info(bond->dev, "Recording %s as primary, but it has not been enslaved to %s yet\n",
-		    primary, bond->dev->name);
+	netdev_dbg(bond->dev, "Recording %s as primary, but it has not been enslaved to %s yet\n",
+		   primary, bond->dev->name);
 
 out:
 	unblock_netpoll_tx();
@@ -1164,8 +1164,8 @@ out:
 static int bond_option_primary_reselect_set(struct bonding *bond,
 					    const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting primary_reselect to %s (%llu)\n",
-		    newval->string, newval->value);
+	netdev_dbg(bond->dev, "Setting primary_reselect to %s (%llu)\n",
+		   newval->string, newval->value);
 	bond->params.primary_reselect = newval->value;
 
 	block_netpoll_tx();
@@ -1178,8 +1178,8 @@ static int bond_option_primary_reselect_set(struct bonding *bond,
 static int bond_option_fail_over_mac_set(struct bonding *bond,
 					 const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting fail_over_mac to %s (%llu)\n",
-		    newval->string, newval->value);
+	netdev_dbg(bond->dev, "Setting fail_over_mac to %s (%llu)\n",
+		   newval->string, newval->value);
 	bond->params.fail_over_mac = newval->value;
 
 	return 0;
@@ -1188,8 +1188,8 @@ static int bond_option_fail_over_mac_set(struct bonding *bond,
 static int bond_option_xmit_hash_policy_set(struct bonding *bond,
 					    const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting xmit hash policy to %s (%llu)\n",
-		    newval->string, newval->value);
+	netdev_dbg(bond->dev, "Setting xmit hash policy to %s (%llu)\n",
+		   newval->string, newval->value);
 	bond->params.xmit_policy = newval->value;
 
 	return 0;
@@ -1198,8 +1198,8 @@ static int bond_option_xmit_hash_policy_set(struct bonding *bond,
 static int bond_option_resend_igmp_set(struct bonding *bond,
 				       const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting resend_igmp to %llu\n",
-		    newval->value);
+	netdev_dbg(bond->dev, "Setting resend_igmp to %llu\n",
+		   newval->value);
 	bond->params.resend_igmp = newval->value;
 
 	return 0;
@@ -1237,8 +1237,8 @@ static int bond_option_all_slaves_active_set(struct bonding *bond,
 static int bond_option_min_links_set(struct bonding *bond,
 				     const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting min links value to %llu\n",
-		    newval->value);
+	netdev_dbg(bond->dev, "Setting min links value to %llu\n",
+		   newval->value);
 	bond->params.min_links = newval->value;
 	bond_set_carrier(bond);
 
@@ -1256,6 +1256,8 @@ static int bond_option_lp_interval_set(struct bonding *bond,
 static int bond_option_pps_set(struct bonding *bond,
 			       const struct bond_opt_value *newval)
 {
+	netdev_dbg(bond->dev, "Setting packets per slave to %llu\n",
+		   newval->value);
 	bond->params.packets_per_slave = newval->value;
 	if (newval->value > 0) {
 		bond->params.reciprocal_packets_per_slave =
@@ -1274,8 +1276,8 @@ static int bond_option_pps_set(struct bonding *bond,
 static int bond_option_lacp_rate_set(struct bonding *bond,
 				     const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting LACP rate to %s (%llu)\n",
-		    newval->string, newval->value);
+	netdev_dbg(bond->dev, "Setting LACP rate to %s (%llu)\n",
+		   newval->string, newval->value);
 	bond->params.lacp_fast = newval->value;
 	bond_3ad_update_lacp_rate(bond);
 
@@ -1285,8 +1287,8 @@ static int bond_option_lacp_rate_set(struct bonding *bond,
 static int bond_option_ad_select_set(struct bonding *bond,
 				     const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting ad_select to %s (%llu)\n",
-		    newval->string, newval->value);
+	netdev_dbg(bond->dev, "Setting ad_select to %s (%llu)\n",
+		   newval->string, newval->value);
 	bond->params.ad_select = newval->value;
 
 	return 0;
@@ -1347,7 +1349,7 @@ out:
 	return ret;
 
 err_no_cmd:
-	netdev_info(bond->dev, "invalid input for queue_id set\n");
+	netdev_dbg(bond->dev, "invalid input for queue_id set\n");
 	ret = -EPERM;
 	goto out;
 
@@ -1369,20 +1371,20 @@ static int bond_option_slaves_set(struct bonding *bond,
 
 	dev = __dev_get_by_name(dev_net(bond->dev), ifname);
 	if (!dev) {
-		netdev_info(bond->dev, "interface %s does not exist!\n",
-			    ifname);
+		netdev_dbg(bond->dev, "interface %s does not exist!\n",
+			   ifname);
 		ret = -ENODEV;
 		goto out;
 	}
 
 	switch (command[0]) {
 	case '+':
-		netdev_info(bond->dev, "Adding slave %s\n", dev->name);
+		netdev_dbg(bond->dev, "Adding slave %s\n", dev->name);
 		ret = bond_enslave(bond->dev, dev);
 		break;
 
 	case '-':
-		netdev_info(bond->dev, "Removing slave %s\n", dev->name);
+		netdev_dbg(bond->dev, "Removing slave %s\n", dev->name);
 		ret = bond_release(bond->dev, dev);
 		break;
 
@@ -1402,8 +1404,8 @@ err_no_cmd:
 static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
 					  const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting dynamic-lb to %s (%llu)\n",
-		    newval->string, newval->value);
+	netdev_dbg(bond->dev, "Setting dynamic-lb to %s (%llu)\n",
+		   newval->string, newval->value);
 	bond->params.tlb_dynamic_lb = newval->value;
 
 	return 0;
@@ -1412,8 +1414,8 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
 static int bond_option_ad_actor_sys_prio_set(struct bonding *bond,
 					     const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting ad_actor_sys_prio to %llu\n",
-		    newval->value);
+	netdev_dbg(bond->dev, "Setting ad_actor_sys_prio to %llu\n",
+		   newval->value);
 
 	bond->params.ad_actor_sys_prio = newval->value;
 	bond_3ad_update_ad_actor_settings(bond);
@@ -1442,7 +1444,7 @@ static int bond_option_ad_actor_system_set(struct bonding *bond,
 	if (!is_valid_ether_addr(mac))
 		goto err;
 
-	netdev_info(bond->dev, "Setting ad_actor_system to %pM\n", mac);
+	netdev_dbg(bond->dev, "Setting ad_actor_system to %pM\n", mac);
 	ether_addr_copy(bond->params.ad_actor_system, mac);
 	bond_3ad_update_ad_actor_settings(bond);
 
@@ -1456,8 +1458,8 @@ err:
 static int bond_option_ad_user_port_key_set(struct bonding *bond,
 					    const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting ad_user_port_key to %llu\n",
-		    newval->value);
+	netdev_dbg(bond->dev, "Setting ad_user_port_key to %llu\n",
+		   newval->value);
 
 	bond->params.ad_user_port_key = newval->value;
 	return 0;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 127adbeefb10..9795419aac2d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -123,38 +123,13 @@
 #define DMA_ISR				0x3008
 #define DMA_AXIARCR			0x3010
 #define DMA_AXIAWCR			0x3018
+#define DMA_AXIAWARCR			0x301c
 #define DMA_DSR0			0x3020
 #define DMA_DSR1			0x3024
+#define DMA_TXEDMACR			0x3040
+#define DMA_RXEDMACR			0x3044
 
 /* DMA register entry bit positions and sizes */
-#define DMA_AXIARCR_DRC_INDEX		0
-#define DMA_AXIARCR_DRC_WIDTH		4
-#define DMA_AXIARCR_DRD_INDEX		4
-#define DMA_AXIARCR_DRD_WIDTH		2
-#define DMA_AXIARCR_TEC_INDEX		8
-#define DMA_AXIARCR_TEC_WIDTH		4
-#define DMA_AXIARCR_TED_INDEX		12
-#define DMA_AXIARCR_TED_WIDTH		2
-#define DMA_AXIARCR_THC_INDEX		16
-#define DMA_AXIARCR_THC_WIDTH		4
-#define DMA_AXIARCR_THD_INDEX		20
-#define DMA_AXIARCR_THD_WIDTH		2
-#define DMA_AXIAWCR_DWC_INDEX		0
-#define DMA_AXIAWCR_DWC_WIDTH		4
-#define DMA_AXIAWCR_DWD_INDEX		4
-#define DMA_AXIAWCR_DWD_WIDTH		2
-#define DMA_AXIAWCR_RPC_INDEX		8
-#define DMA_AXIAWCR_RPC_WIDTH		4
-#define DMA_AXIAWCR_RPD_INDEX		12
-#define DMA_AXIAWCR_RPD_WIDTH		2
-#define DMA_AXIAWCR_RHC_INDEX		16
-#define DMA_AXIAWCR_RHC_WIDTH		4
-#define DMA_AXIAWCR_RHD_INDEX		20
-#define DMA_AXIAWCR_RHD_WIDTH		2
-#define DMA_AXIAWCR_TDC_INDEX		24
-#define DMA_AXIAWCR_TDC_WIDTH		4
-#define DMA_AXIAWCR_TDD_INDEX		28
-#define DMA_AXIAWCR_TDD_WIDTH		2
 #define DMA_ISR_MACIS_INDEX		17
 #define DMA_ISR_MACIS_WIDTH		1
 #define DMA_ISR_MTLIS_INDEX		16
@@ -163,14 +138,31 @@
 #define DMA_MR_INTM_WIDTH		2
 #define DMA_MR_SWR_INDEX		0
 #define DMA_MR_SWR_WIDTH		1
+#define DMA_RXEDMACR_RDPS_INDEX		0
+#define DMA_RXEDMACR_RDPS_WIDTH		3
+#define DMA_SBMR_AAL_INDEX		12
+#define DMA_SBMR_AAL_WIDTH		1
 #define DMA_SBMR_EAME_INDEX		11
 #define DMA_SBMR_EAME_WIDTH		1
-#define DMA_SBMR_BLEN_256_INDEX		7
-#define DMA_SBMR_BLEN_256_WIDTH		1
+#define DMA_SBMR_BLEN_INDEX		1
+#define DMA_SBMR_BLEN_WIDTH		7
+#define DMA_SBMR_RD_OSR_LMT_INDEX	16
+#define DMA_SBMR_RD_OSR_LMT_WIDTH	6
 #define DMA_SBMR_UNDEF_INDEX		0
 #define DMA_SBMR_UNDEF_WIDTH		1
+#define DMA_SBMR_WR_OSR_LMT_INDEX	24
+#define DMA_SBMR_WR_OSR_LMT_WIDTH	6
+#define DMA_TXEDMACR_TDPS_INDEX		0
+#define DMA_TXEDMACR_TDPS_WIDTH		3
 
 /* DMA register values */
+#define DMA_SBMR_BLEN_256		256
+#define DMA_SBMR_BLEN_128		128
+#define DMA_SBMR_BLEN_64		64
+#define DMA_SBMR_BLEN_32		32
+#define DMA_SBMR_BLEN_16		16
+#define DMA_SBMR_BLEN_8			8
+#define DMA_SBMR_BLEN_4			4
 #define DMA_DSR_RPS_WIDTH		4
 #define DMA_DSR_TPS_WIDTH		4
 #define DMA_DSR_Q_WIDTH			(DMA_DSR_RPS_WIDTH + DMA_DSR_TPS_WIDTH)
@@ -959,6 +951,7 @@
 #define XP_DRIVER_INT_RO		0x0064
 #define XP_DRIVER_SCRATCH_0		0x0068
 #define XP_DRIVER_SCRATCH_1		0x006c
+#define XP_INT_REISSUE_EN		0x0074
 #define XP_INT_EN			0x0078
 #define XP_I2C_MUTEX			0x0080
 #define XP_MDIO_MUTEX			0x0084
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index 0a98c369df20..45d92304068e 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -176,8 +176,8 @@ static void xgbe_free_ring_resources(struct xgbe_prv_data *pdata)
 
 	DBGPR("-->xgbe_free_ring_resources\n");
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
 		xgbe_free_ring(pdata, channel->tx_ring);
 		xgbe_free_ring(pdata, channel->rx_ring);
 	}
@@ -185,34 +185,60 @@ static void xgbe_free_ring_resources(struct xgbe_prv_data *pdata)
 	DBGPR("<--xgbe_free_ring_resources\n");
 }
 
+static void *xgbe_alloc_node(size_t size, int node)
+{
+	void *mem;
+
+	mem = kzalloc_node(size, GFP_KERNEL, node);
+	if (!mem)
+		mem = kzalloc(size, GFP_KERNEL);
+
+	return mem;
+}
+
+static void *xgbe_dma_alloc_node(struct device *dev, size_t size,
+				 dma_addr_t *dma, int node)
+{
+	void *mem;
+	int cur_node = dev_to_node(dev);
+
+	set_dev_node(dev, node);
+	mem = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
+	set_dev_node(dev, cur_node);
+
+	if (!mem)
+		mem = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
+
+	return mem;
+}
+
 static int xgbe_init_ring(struct xgbe_prv_data *pdata,
 			  struct xgbe_ring *ring, unsigned int rdesc_count)
 {
-	DBGPR("-->xgbe_init_ring\n");
+	size_t size;
 
 	if (!ring)
 		return 0;
 
 	/* Descriptors */
+	size = rdesc_count * sizeof(struct xgbe_ring_desc);
+
 	ring->rdesc_count = rdesc_count;
-	ring->rdesc = dma_alloc_coherent(pdata->dev,
-					 (sizeof(struct xgbe_ring_desc) *
-					  rdesc_count), &ring->rdesc_dma,
-					 GFP_KERNEL);
+	ring->rdesc = xgbe_dma_alloc_node(pdata->dev, size, &ring->rdesc_dma,
+					  ring->node);
 	if (!ring->rdesc)
 		return -ENOMEM;
 
 	/* Descriptor information */
-	ring->rdata = kcalloc(rdesc_count, sizeof(struct xgbe_ring_data),
-			      GFP_KERNEL);
+	size = rdesc_count * sizeof(struct xgbe_ring_data);
+
+	ring->rdata = xgbe_alloc_node(size, ring->node);
 	if (!ring->rdata)
 		return -ENOMEM;
 
 	netif_dbg(pdata, drv, pdata->netdev,
-		  "rdesc=%p, rdesc_dma=%pad, rdata=%p\n",
-		  ring->rdesc, &ring->rdesc_dma, ring->rdata);
-
-	DBGPR("<--xgbe_init_ring\n");
+		  "rdesc=%p, rdesc_dma=%pad, rdata=%p, node=%d\n",
+		  ring->rdesc, &ring->rdesc_dma, ring->rdata, ring->node);
 
 	return 0;
 }
@@ -223,10 +249,8 @@ static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata)
 	unsigned int i;
 	int ret;
 
-	DBGPR("-->xgbe_alloc_ring_resources\n");
-
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
 		netif_dbg(pdata, drv, pdata->netdev, "%s - Tx ring:\n",
 			  channel->name);
 
@@ -250,8 +274,6 @@ static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata)
 		}
 	}
 
-	DBGPR("<--xgbe_alloc_ring_resources\n");
-
 	return 0;
 
 err_ring:
@@ -261,21 +283,33 @@ err_ring:
 }
 
 static int xgbe_alloc_pages(struct xgbe_prv_data *pdata,
-			    struct xgbe_page_alloc *pa, gfp_t gfp, int order)
+			    struct xgbe_page_alloc *pa, int alloc_order,
+			    int node)
 {
 	struct page *pages = NULL;
 	dma_addr_t pages_dma;
-	int ret;
+	gfp_t gfp;
+	int order, ret;
+
+again:
+	order = alloc_order;
 
 	/* Try to obtain pages, decreasing order if necessary */
-	gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
+	gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
 	while (order >= 0) {
-		pages = alloc_pages(gfp, order);
+		pages = alloc_pages_node(node, gfp, order);
 		if (pages)
 			break;
 
 		order--;
 	}
+
+	/* If we couldn't get local pages, try getting from anywhere */
+	if (!pages && (node != NUMA_NO_NODE)) {
+		node = NUMA_NO_NODE;
+		goto again;
+	}
+
 	if (!pages)
 		return -ENOMEM;
 
@@ -327,14 +361,14 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
 	int ret;
 
 	if (!ring->rx_hdr_pa.pages) {
-		ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, GFP_ATOMIC, 0);
+		ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, 0, ring->node);
 		if (ret)
 			return ret;
 	}
 
 	if (!ring->rx_buf_pa.pages) {
-		ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa, GFP_ATOMIC,
-				       PAGE_ALLOC_COSTLY_ORDER);
+		ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa,
+				       PAGE_ALLOC_COSTLY_ORDER, ring->node);
 		if (ret)
 			return ret;
 	}
@@ -362,8 +396,8 @@ static void xgbe_wrapper_tx_descriptor_init(struct xgbe_prv_data *pdata)
 
 	DBGPR("-->xgbe_wrapper_tx_descriptor_init\n");
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
 		ring = channel->tx_ring;
 		if (!ring)
 			break;
@@ -403,8 +437,8 @@ static void xgbe_wrapper_rx_descriptor_init(struct xgbe_prv_data *pdata)
 
 	DBGPR("-->xgbe_wrapper_rx_descriptor_init\n");
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
 		ring = channel->rx_ring;
 		if (!ring)
 			break;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 24a687ce4388..06f953e1e9b2 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -174,58 +174,30 @@ static unsigned int xgbe_riwt_to_usec(struct xgbe_prv_data *pdata,
 	return ret;
 }
 
-static int xgbe_config_pblx8(struct xgbe_prv_data *pdata)
+static int xgbe_config_pbl_val(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
+	unsigned int pblx8, pbl;
 	unsigned int i;
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++)
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_CR, PBLX8,
-				       pdata->pblx8);
-
-	return 0;
-}
-
-static int xgbe_get_tx_pbl_val(struct xgbe_prv_data *pdata)
-{
-	return XGMAC_DMA_IOREAD_BITS(pdata->channel, DMA_CH_TCR, PBL);
-}
-
-static int xgbe_config_tx_pbl_val(struct xgbe_prv_data *pdata)
-{
-	struct xgbe_channel *channel;
-	unsigned int i;
-
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->tx_ring)
-			break;
+	pblx8 = DMA_PBL_X8_DISABLE;
+	pbl = pdata->pbl;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, PBL,
-				       pdata->tx_pbl);
+	if (pdata->pbl > 32) {
+		pblx8 = DMA_PBL_X8_ENABLE;
+		pbl >>= 3;
 	}
 
-	return 0;
-}
-
-static int xgbe_get_rx_pbl_val(struct xgbe_prv_data *pdata)
-{
-	return XGMAC_DMA_IOREAD_BITS(pdata->channel, DMA_CH_RCR, PBL);
-}
-
-static int xgbe_config_rx_pbl_val(struct xgbe_prv_data *pdata)
-{
-	struct xgbe_channel *channel;
-	unsigned int i;
+	for (i = 0; i < pdata->channel_count; i++) {
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, PBLX8,
+				       pblx8);
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->rx_ring)
-			break;
+		if (pdata->channel[i]->tx_ring)
+			XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR,
+					       PBL, pbl);
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, PBL,
-				       pdata->rx_pbl);
+		if (pdata->channel[i]->rx_ring)
+			XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR,
+					       PBL, pbl);
 	}
 
 	return 0;
@@ -233,15 +205,13 @@ static int xgbe_config_rx_pbl_val(struct xgbe_prv_data *pdata)
 
 static int xgbe_config_osp_mode(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->tx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->tx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, OSP,
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, OSP,
 				       pdata->tx_osp_mode);
 	}
 
@@ -292,15 +262,13 @@ static int xgbe_config_tx_threshold(struct xgbe_prv_data *pdata,
 
 static int xgbe_config_rx_coalesce(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->rx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->rx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RIWT, RWT,
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RIWT, RWT,
 				       pdata->rx_riwt);
 	}
 
@@ -314,44 +282,38 @@ static int xgbe_config_tx_coalesce(struct xgbe_prv_data *pdata)
 
 static void xgbe_config_rx_buffer_size(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->rx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->rx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, RBSZ,
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, RBSZ,
 				       pdata->rx_buf_size);
 	}
 }
 
 static void xgbe_config_tso_mode(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->tx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->tx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, TSE, 1);
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, TSE, 1);
 	}
 }
 
 static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->rx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->rx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_CR, SPH, 1);
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 1);
 	}
 
 	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE);
@@ -651,8 +613,9 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
 		XGMAC_IOWRITE_BITS(pdata, DMA_MR, INTM,
 				   pdata->channel_irq_mode);
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
+
 		/* Clear all the interrupts which are set */
 		dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
 		XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr);
@@ -1497,26 +1460,37 @@ static void xgbe_rx_desc_init(struct xgbe_channel *channel)
 static void xgbe_update_tstamp_addend(struct xgbe_prv_data *pdata,
 				      unsigned int addend)
 {
+	unsigned int count = 10000;
+
 	/* Set the addend register value and tell the device */
 	XGMAC_IOWRITE(pdata, MAC_TSAR, addend);
 	XGMAC_IOWRITE_BITS(pdata, MAC_TSCR, TSADDREG, 1);
 
 	/* Wait for addend update to complete */
-	while (XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSADDREG))
+	while (--count && XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSADDREG))
 		udelay(5);
+
+	if (!count)
+		netdev_err(pdata->netdev,
+			   "timed out updating timestamp addend register\n");
 }
 
 static void xgbe_set_tstamp_time(struct xgbe_prv_data *pdata, unsigned int sec,
 				 unsigned int nsec)
 {
+	unsigned int count = 10000;
+
 	/* Set the time values and tell the device */
 	XGMAC_IOWRITE(pdata, MAC_STSUR, sec);
 	XGMAC_IOWRITE(pdata, MAC_STNUR, nsec);
 	XGMAC_IOWRITE_BITS(pdata, MAC_TSCR, TSINIT, 1);
 
 	/* Wait for time update to complete */
-	while (XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSINIT))
+	while (--count && XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSINIT))
 		udelay(5);
+
+	if (!count)
+		netdev_err(pdata->netdev, "timed out initializing timestamp\n");
 }
 
 static u64 xgbe_get_tstamp_time(struct xgbe_prv_data *pdata)
@@ -2140,37 +2114,38 @@ static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata)
 
 static void xgbe_config_dma_bus(struct xgbe_prv_data *pdata)
 {
+	unsigned int sbmr;
+
+	sbmr = XGMAC_IOREAD(pdata, DMA_SBMR);
+
 	/* Set enhanced addressing mode */
-	XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, EAME, 1);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, EAME, 1);
 
 	/* Set the System Bus mode */
-	XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, UNDEF, 1);
-	XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, BLEN_256, 1);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, UNDEF, 1);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, BLEN, pdata->blen >> 2);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, AAL, pdata->aal);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, RD_OSR_LMT, pdata->rd_osr_limit - 1);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, WR_OSR_LMT, pdata->wr_osr_limit - 1);
+
+	XGMAC_IOWRITE(pdata, DMA_SBMR, sbmr);
+
+	/* Set descriptor fetching threshold */
+	if (pdata->vdata->tx_desc_prefetch)
+		XGMAC_IOWRITE_BITS(pdata, DMA_TXEDMACR, TDPS,
+				   pdata->vdata->tx_desc_prefetch);
+
+	if (pdata->vdata->rx_desc_prefetch)
+		XGMAC_IOWRITE_BITS(pdata, DMA_RXEDMACR, RDPS,
+				   pdata->vdata->rx_desc_prefetch);
 }
 
 static void xgbe_config_dma_cache(struct xgbe_prv_data *pdata)
 {
-	unsigned int arcache, awcache;
-
-	arcache = 0;
-	XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRC, pdata->arcache);
-	XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRD, pdata->axdomain);
-	XGMAC_SET_BITS(arcache, DMA_AXIARCR, TEC, pdata->arcache);
-	XGMAC_SET_BITS(arcache, DMA_AXIARCR, TED, pdata->axdomain);
-	XGMAC_SET_BITS(arcache, DMA_AXIARCR, THC, pdata->arcache);
-	XGMAC_SET_BITS(arcache, DMA_AXIARCR, THD, pdata->axdomain);
-	XGMAC_IOWRITE(pdata, DMA_AXIARCR, arcache);
-
-	awcache = 0;
-	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWC, pdata->awcache);
-	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWD, pdata->axdomain);
-	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPC, pdata->awcache);
-	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPD, pdata->axdomain);
-	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHC, pdata->awcache);
-	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHD, pdata->axdomain);
-	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDC, pdata->awcache);
-	XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDD, pdata->axdomain);
-	XGMAC_IOWRITE(pdata, DMA_AXIAWCR, awcache);
+	XGMAC_IOWRITE(pdata, DMA_AXIARCR, pdata->arcr);
+	XGMAC_IOWRITE(pdata, DMA_AXIAWCR, pdata->awcr);
+	if (pdata->awarcr)
+		XGMAC_IOWRITE(pdata, DMA_AXIAWARCR, pdata->awarcr);
 }
 
 static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata)
@@ -3202,16 +3177,14 @@ static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata,
 
 static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
 	/* Enable each Tx DMA channel */
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->tx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->tx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 1);
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 1);
 	}
 
 	/* Enable each Tx queue */
@@ -3225,7 +3198,6 @@ static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
 
 static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
 	/* Prepare for Tx DMA channel stop */
@@ -3240,12 +3212,11 @@ static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
 		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0);
 
 	/* Disable each Tx DMA channel */
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->tx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->tx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 0);
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0);
 	}
 }
 
@@ -3277,16 +3248,14 @@ static void xgbe_prepare_rx_stop(struct xgbe_prv_data *pdata,
 
 static void xgbe_enable_rx(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int reg_val, i;
 
 	/* Enable each Rx DMA channel */
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->rx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->rx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 1);
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 1);
 	}
 
 	/* Enable each Rx queue */
@@ -3304,7 +3273,6 @@ static void xgbe_enable_rx(struct xgbe_prv_data *pdata)
 
 static void xgbe_disable_rx(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
 	/* Disable MAC Rx */
@@ -3321,27 +3289,24 @@ static void xgbe_disable_rx(struct xgbe_prv_data *pdata)
 	XGMAC_IOWRITE(pdata, MAC_RQC0R, 0);
 
 	/* Disable each Rx DMA channel */
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->rx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->rx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 0);
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 0);
 	}
 }
 
 static void xgbe_powerup_tx(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
 	/* Enable each Tx DMA channel */
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->tx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->tx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 1);
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 1);
 	}
 
 	/* Enable MAC Tx */
@@ -3350,7 +3315,6 @@ static void xgbe_powerup_tx(struct xgbe_prv_data *pdata)
 
 static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
 	/* Prepare for Tx DMA channel stop */
@@ -3361,42 +3325,37 @@ static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata)
 	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
 
 	/* Disable each Tx DMA channel */
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->tx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->tx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 0);
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0);
 	}
 }
 
 static void xgbe_powerup_rx(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
 	/* Enable each Rx DMA channel */
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->rx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->rx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 1);
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 1);
 	}
 }
 
 static void xgbe_powerdown_rx(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
 	/* Disable each Rx DMA channel */
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->rx_ring)
+	for (i = 0; i < pdata->channel_count; i++) {
+		if (!pdata->channel[i]->rx_ring)
 			break;
 
-		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 0);
+		XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 0);
 	}
 }
 
@@ -3420,9 +3379,7 @@ static int xgbe_init(struct xgbe_prv_data *pdata)
 	xgbe_config_dma_bus(pdata);
 	xgbe_config_dma_cache(pdata);
 	xgbe_config_osp_mode(pdata);
-	xgbe_config_pblx8(pdata);
-	xgbe_config_tx_pbl_val(pdata);
-	xgbe_config_rx_pbl_val(pdata);
+	xgbe_config_pbl_val(pdata);
 	xgbe_config_rx_coalesce(pdata);
 	xgbe_config_tx_coalesce(pdata);
 	xgbe_config_rx_buffer_size(pdata);
@@ -3550,13 +3507,6 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
 	/* For TX DMA Operating on Second Frame config */
 	hw_if->config_osp_mode = xgbe_config_osp_mode;
 
-	/* For RX and TX PBL config */
-	hw_if->config_rx_pbl_val = xgbe_config_rx_pbl_val;
-	hw_if->get_rx_pbl_val = xgbe_get_rx_pbl_val;
-	hw_if->config_tx_pbl_val = xgbe_config_tx_pbl_val;
-	hw_if->get_tx_pbl_val = xgbe_get_tx_pbl_val;
-	hw_if->config_pblx8 = xgbe_config_pblx8;
-
 	/* For MMC statistics support */
 	hw_if->tx_mmc_int = xgbe_tx_mmc_int;
 	hw_if->rx_mmc_int = xgbe_rx_mmc_int;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index a934bd5d0507..ecef3ee87b17 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -158,81 +158,106 @@ static int xgbe_one_poll(struct napi_struct *, int);
 static int xgbe_all_poll(struct napi_struct *, int);
 static void xgbe_stop(struct xgbe_prv_data *);
 
+static void *xgbe_alloc_node(size_t size, int node)
+{
+	void *mem;
+
+	mem = kzalloc_node(size, GFP_KERNEL, node);
+	if (!mem)
+		mem = kzalloc(size, GFP_KERNEL);
+
+	return mem;
+}
+
+static void xgbe_free_channels(struct xgbe_prv_data *pdata)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(pdata->channel); i++) {
+		if (!pdata->channel[i])
+			continue;
+
+		kfree(pdata->channel[i]->rx_ring);
+		kfree(pdata->channel[i]->tx_ring);
+		kfree(pdata->channel[i]);
+
+		pdata->channel[i] = NULL;
+	}
+
+	pdata->channel_count = 0;
+}
+
 static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel_mem, *channel;
-	struct xgbe_ring *tx_ring, *rx_ring;
+	struct xgbe_channel *channel;
+	struct xgbe_ring *ring;
 	unsigned int count, i;
-	int ret = -ENOMEM;
+	unsigned int cpu;
+	int node;
 
 	count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count);
+	for (i = 0; i < count; i++) {
+		/* Attempt to use a CPU on the node the device is on */
+		cpu = cpumask_local_spread(i, dev_to_node(pdata->dev));
 
-	channel_mem = kcalloc(count, sizeof(struct xgbe_channel), GFP_KERNEL);
-	if (!channel_mem)
-		goto err_channel;
-
-	tx_ring = kcalloc(pdata->tx_ring_count, sizeof(struct xgbe_ring),
-			  GFP_KERNEL);
-	if (!tx_ring)
-		goto err_tx_ring;
+		/* Set the allocation node based on the returned CPU */
+		node = cpu_to_node(cpu);
 
-	rx_ring = kcalloc(pdata->rx_ring_count, sizeof(struct xgbe_ring),
-			  GFP_KERNEL);
-	if (!rx_ring)
-		goto err_rx_ring;
+		channel = xgbe_alloc_node(sizeof(*channel), node);
+		if (!channel)
+			goto err_mem;
+		pdata->channel[i] = channel;
 
-	for (i = 0, channel = channel_mem; i < count; i++, channel++) {
 		snprintf(channel->name, sizeof(channel->name), "channel-%u", i);
 		channel->pdata = pdata;
 		channel->queue_index = i;
 		channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE +
 				    (DMA_CH_INC * i);
+		channel->node = node;
+		cpumask_set_cpu(cpu, &channel->affinity_mask);
 
 		if (pdata->per_channel_irq)
 			channel->dma_irq = pdata->channel_irq[i];
 
 		if (i < pdata->tx_ring_count) {
-			spin_lock_init(&tx_ring->lock);
-			channel->tx_ring = tx_ring++;
+			ring = xgbe_alloc_node(sizeof(*ring), node);
+			if (!ring)
+				goto err_mem;
+
+			spin_lock_init(&ring->lock);
+			ring->node = node;
+
+			channel->tx_ring = ring;
 		}
 
 		if (i < pdata->rx_ring_count) {
-			spin_lock_init(&rx_ring->lock);
-			channel->rx_ring = rx_ring++;
+			ring = xgbe_alloc_node(sizeof(*ring), node);
+			if (!ring)
+				goto err_mem;
+
+			spin_lock_init(&ring->lock);
+			ring->node = node;
+
+			channel->rx_ring = ring;
 		}
 
 		netif_dbg(pdata, drv, pdata->netdev,
+			  "%s: cpu=%u, node=%d\n", channel->name, cpu, node);
+
+		netif_dbg(pdata, drv, pdata->netdev,
 			  "%s: dma_regs=%p, dma_irq=%d, tx=%p, rx=%p\n",
 			  channel->name, channel->dma_regs, channel->dma_irq,
 			  channel->tx_ring, channel->rx_ring);
 	}
 
-	pdata->channel = channel_mem;
 	pdata->channel_count = count;
 
 	return 0;
 
-err_rx_ring:
-	kfree(tx_ring);
-
-err_tx_ring:
-	kfree(channel_mem);
-
-err_channel:
-	return ret;
-}
-
-static void xgbe_free_channels(struct xgbe_prv_data *pdata)
-{
-	if (!pdata->channel)
-		return;
-
-	kfree(pdata->channel->rx_ring);
-	kfree(pdata->channel->tx_ring);
-	kfree(pdata->channel);
+err_mem:
+	xgbe_free_channels(pdata);
 
-	pdata->channel = NULL;
-	pdata->channel_count = 0;
+	return -ENOMEM;
 }
 
 static inline unsigned int xgbe_tx_avail_desc(struct xgbe_ring *ring)
@@ -301,12 +326,10 @@ static void xgbe_enable_rx_tx_int(struct xgbe_prv_data *pdata,
 
 static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++)
-		xgbe_enable_rx_tx_int(pdata, channel);
+	for (i = 0; i < pdata->channel_count; i++)
+		xgbe_enable_rx_tx_int(pdata, pdata->channel[i]);
 }
 
 static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata,
@@ -329,12 +352,10 @@ static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata,
 
 static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_channel *channel;
 	unsigned int i;
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++)
-		xgbe_disable_rx_tx_int(pdata, channel);
+	for (i = 0; i < pdata->channel_count; i++)
+		xgbe_disable_rx_tx_int(pdata, pdata->channel[i]);
 }
 
 static bool xgbe_ecc_sec(struct xgbe_prv_data *pdata, unsigned long *period,
@@ -382,9 +403,9 @@ static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period,
 	return false;
 }
 
-static irqreturn_t xgbe_ecc_isr(int irq, void *data)
+static void xgbe_ecc_isr_task(unsigned long data)
 {
-	struct xgbe_prv_data *pdata = data;
+	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
 	unsigned int ecc_isr;
 	bool stop = false;
 
@@ -435,12 +456,26 @@ out:
 	/* Clear all ECC interrupts */
 	XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr);
 
-	return IRQ_HANDLED;
+	/* Reissue interrupt if status is not clear */
+	if (pdata->vdata->irq_reissue_support)
+		XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 1);
 }
 
-static irqreturn_t xgbe_isr(int irq, void *data)
+static irqreturn_t xgbe_ecc_isr(int irq, void *data)
 {
 	struct xgbe_prv_data *pdata = data;
+
+	if (pdata->isr_as_tasklet)
+		tasklet_schedule(&pdata->tasklet_ecc);
+	else
+		xgbe_ecc_isr_task((unsigned long)pdata);
+
+	return IRQ_HANDLED;
+}
+
+static void xgbe_isr_task(unsigned long data)
+{
+	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_channel *channel;
 	unsigned int dma_isr, dma_ch_isr;
@@ -461,7 +496,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 		if (!(dma_isr & (1 << i)))
 			continue;
 
-		channel = pdata->channel + i;
+		channel = pdata->channel[i];
 
 		dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
 		netif_dbg(pdata, intr, pdata->netdev, "DMA_CH%u_ISR=%#010x\n",
@@ -543,15 +578,36 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 isr_done:
 	/* If there is not a separate AN irq, handle it here */
 	if (pdata->dev_irq == pdata->an_irq)
-		pdata->phy_if.an_isr(irq, pdata);
+		pdata->phy_if.an_isr(pdata);
 
 	/* If there is not a separate ECC irq, handle it here */
 	if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq))
-		xgbe_ecc_isr(irq, pdata);
+		xgbe_ecc_isr_task((unsigned long)pdata);
 
 	/* If there is not a separate I2C irq, handle it here */
 	if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq))
-		pdata->i2c_if.i2c_isr(irq, pdata);
+		pdata->i2c_if.i2c_isr(pdata);
+
+	/* Reissue interrupt if status is not clear */
+	if (pdata->vdata->irq_reissue_support) {
+		unsigned int reissue_mask;
+
+		reissue_mask = 1 << 0;
+		if (!pdata->per_channel_irq)
+			reissue_mask |= 0xffff < 4;
+
+		XP_IOWRITE(pdata, XP_INT_REISSUE_EN, reissue_mask);
+	}
+}
+
+static irqreturn_t xgbe_isr(int irq, void *data)
+{
+	struct xgbe_prv_data *pdata = data;
+
+	if (pdata->isr_as_tasklet)
+		tasklet_schedule(&pdata->tasklet_dev);
+	else
+		xgbe_isr_task((unsigned long)pdata);
 
 	return IRQ_HANDLED;
 }
@@ -640,8 +696,8 @@ static void xgbe_init_timers(struct xgbe_prv_data *pdata)
 	setup_timer(&pdata->service_timer, xgbe_service_timer,
 		    (unsigned long)pdata);
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
 		if (!channel->tx_ring)
 			break;
 
@@ -662,8 +718,8 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata)
 
 	del_timer_sync(&pdata->service_timer);
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
 		if (!channel->tx_ring)
 			break;
 
@@ -781,8 +837,8 @@ static void xgbe_napi_enable(struct xgbe_prv_data *pdata, unsigned int add)
 	unsigned int i;
 
 	if (pdata->per_channel_irq) {
-		channel = pdata->channel;
-		for (i = 0; i < pdata->channel_count; i++, channel++) {
+		for (i = 0; i < pdata->channel_count; i++) {
+			channel = pdata->channel[i];
 			if (add)
 				netif_napi_add(pdata->netdev, &channel->napi,
 					       xgbe_one_poll, NAPI_POLL_WEIGHT);
@@ -804,8 +860,8 @@ static void xgbe_napi_disable(struct xgbe_prv_data *pdata, unsigned int del)
 	unsigned int i;
 
 	if (pdata->per_channel_irq) {
-		channel = pdata->channel;
-		for (i = 0; i < pdata->channel_count; i++, channel++) {
+		for (i = 0; i < pdata->channel_count; i++) {
+			channel = pdata->channel[i];
 			napi_disable(&channel->napi);
 
 			if (del)
@@ -826,6 +882,10 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
 	unsigned int i;
 	int ret;
 
+	tasklet_init(&pdata->tasklet_dev, xgbe_isr_task, (unsigned long)pdata);
+	tasklet_init(&pdata->tasklet_ecc, xgbe_ecc_isr_task,
+		     (unsigned long)pdata);
+
 	ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0,
 			       netdev->name, pdata);
 	if (ret) {
@@ -847,8 +907,8 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
 	if (!pdata->per_channel_irq)
 		return 0;
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
 		snprintf(channel->dma_irq_name,
 			 sizeof(channel->dma_irq_name) - 1,
 			 "%s-TxRx-%u", netdev_name(netdev),
@@ -862,14 +922,21 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
 				     channel->dma_irq);
 			goto err_dma_irq;
 		}
+
+		irq_set_affinity_hint(channel->dma_irq,
+				      &channel->affinity_mask);
 	}
 
 	return 0;
 
 err_dma_irq:
 	/* Using an unsigned int, 'i' will go to UINT_MAX and exit */
-	for (i--, channel--; i < pdata->channel_count; i--, channel--)
+	for (i--; i < pdata->channel_count; i--) {
+		channel = pdata->channel[i];
+
+		irq_set_affinity_hint(channel->dma_irq, NULL);
 		devm_free_irq(pdata->dev, channel->dma_irq, channel);
+	}
 
 	if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
 		devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
@@ -893,9 +960,12 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
 	if (!pdata->per_channel_irq)
 		return;
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++)
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
+
+		irq_set_affinity_hint(channel->dma_irq, NULL);
 		devm_free_irq(pdata->dev, channel->dma_irq, channel);
+	}
 }
 
 void xgbe_init_tx_coalesce(struct xgbe_prv_data *pdata)
@@ -930,16 +1000,14 @@ void xgbe_init_rx_coalesce(struct xgbe_prv_data *pdata)
 static void xgbe_free_tx_data(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_desc_if *desc_if = &pdata->desc_if;
-	struct xgbe_channel *channel;
 	struct xgbe_ring *ring;
 	struct xgbe_ring_data *rdata;
 	unsigned int i, j;
 
 	DBGPR("-->xgbe_free_tx_data\n");
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		ring = channel->tx_ring;
+	for (i = 0; i < pdata->channel_count; i++) {
+		ring = pdata->channel[i]->tx_ring;
 		if (!ring)
 			break;
 
@@ -955,16 +1023,14 @@ static void xgbe_free_tx_data(struct xgbe_prv_data *pdata)
 static void xgbe_free_rx_data(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_desc_if *desc_if = &pdata->desc_if;
-	struct xgbe_channel *channel;
 	struct xgbe_ring *ring;
 	struct xgbe_ring_data *rdata;
 	unsigned int i, j;
 
 	DBGPR("-->xgbe_free_rx_data\n");
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		ring = channel->rx_ring;
+	for (i = 0; i < pdata->channel_count; i++) {
+		ring = pdata->channel[i]->rx_ring;
 		if (!ring)
 			break;
 
@@ -1140,8 +1206,8 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 
 	hw_if->exit(pdata);
 
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
+	for (i = 0; i < pdata->channel_count; i++) {
+		channel = pdata->channel[i];
 		if (!channel->tx_ring)
 			continue;
 
@@ -1212,6 +1278,10 @@ static void xgbe_tx_tstamp(struct work_struct *work)
 	u64 nsec;
 	unsigned long flags;
 
+	spin_lock_irqsave(&pdata->tstamp_lock, flags);
+	if (!pdata->tx_tstamp_skb)
+		goto unlock;
+
 	if (pdata->tx_tstamp) {
 		nsec = timecounter_cyc2time(&pdata->tstamp_tc,
 					    pdata->tx_tstamp);
@@ -1223,8 +1293,9 @@ static void xgbe_tx_tstamp(struct work_struct *work)
 
 	dev_kfree_skb_any(pdata->tx_tstamp_skb);
 
-	spin_lock_irqsave(&pdata->tstamp_lock, flags);
 	pdata->tx_tstamp_skb = NULL;
+
+unlock:
 	spin_unlock_irqrestore(&pdata->tstamp_lock, flags);
 }
 
@@ -1623,7 +1694,7 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	DBGPR("-->xgbe_xmit: skb->len = %d\n", skb->len);
 
-	channel = pdata->channel + skb->queue_mapping;
+	channel = pdata->channel[skb->queue_mapping];
 	txq = netdev_get_tx_queue(netdev, channel->queue_index);
 	ring = channel->tx_ring;
 	packet = &ring->packet_data;
@@ -1833,9 +1904,10 @@ static void xgbe_poll_controller(struct net_device *netdev)
 	DBGPR("-->xgbe_poll_controller\n");
 
 	if (pdata->per_channel_irq) {
-		channel = pdata->channel;
-		for (i = 0; i < pdata->channel_count; i++, channel++)
+		for (i = 0; i < pdata->channel_count; i++) {
+			channel = pdata->channel[i];
 			xgbe_dma_isr(channel->dma_irq, channel);
+		}
 	} else {
 		disable_irq(pdata->dev_irq);
 		xgbe_isr(pdata->dev_irq, pdata);
@@ -2328,8 +2400,9 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget)
 	do {
 		last_processed = processed;
 
-		channel = pdata->channel;
-		for (i = 0; i < pdata->channel_count; i++, channel++) {
+		for (i = 0; i < pdata->channel_count; i++) {
+			channel = pdata->channel[i];
+
 			/* Cleanup Tx ring first */
 			xgbe_tx_poll(channel);
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 920566a3a599..67a2e52ad25d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -247,7 +247,7 @@ static int xgbe_set_pauseparam(struct net_device *netdev,
 
 	if (pause->autoneg && (pdata->phy.autoneg != AUTONEG_ENABLE)) {
 		netdev_err(netdev,
-			   "autoneg disabled, pause autoneg not avialable\n");
+			   "autoneg disabled, pause autoneg not available\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
index 417bdb5982a9..4d9062d35930 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
@@ -274,13 +274,16 @@ static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata,
 		XI2C_IOREAD(pdata, IC_CLR_STOP_DET);
 }
 
-static irqreturn_t xgbe_i2c_isr(int irq, void *data)
+static void xgbe_i2c_isr_task(unsigned long data)
 {
 	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
 	struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
 	unsigned int isr;
 
 	isr = XI2C_IOREAD(pdata, IC_RAW_INTR_STAT);
+	if (!isr)
+		goto reissue_check;
+
 	netif_dbg(pdata, intr, pdata->netdev,
 		  "I2C interrupt received: status=%#010x\n", isr);
 
@@ -308,6 +311,21 @@ out:
 	if (state->ret || XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET))
 		complete(&pdata->i2c_complete);
 
+reissue_check:
+	/* Reissue interrupt if status is not clear */
+	if (pdata->vdata->irq_reissue_support)
+		XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 2);
+}
+
+static irqreturn_t xgbe_i2c_isr(int irq, void *data)
+{
+	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+
+	if (pdata->isr_as_tasklet)
+		tasklet_schedule(&pdata->tasklet_i2c);
+	else
+		xgbe_i2c_isr_task((unsigned long)pdata);
+
 	return IRQ_HANDLED;
 }
 
@@ -349,12 +367,11 @@ static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr)
 	XI2C_IOWRITE(pdata, IC_TAR, addr);
 }
 
-static irqreturn_t xgbe_i2c_combined_isr(int irq, struct xgbe_prv_data *pdata)
+static irqreturn_t xgbe_i2c_combined_isr(struct xgbe_prv_data *pdata)
 {
-	if (!XI2C_IOREAD(pdata, IC_RAW_INTR_STAT))
-		return IRQ_HANDLED;
+	xgbe_i2c_isr_task((unsigned long)pdata);
 
-	return xgbe_i2c_isr(irq, pdata);
+	return IRQ_HANDLED;
 }
 
 static int xgbe_i2c_xfer(struct xgbe_prv_data *pdata, struct xgbe_i2c_op *op)
@@ -445,6 +462,9 @@ static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
 
 	/* If we have a separate I2C irq, enable it */
 	if (pdata->dev_irq != pdata->i2c_irq) {
+		tasklet_init(&pdata->tasklet_i2c, xgbe_i2c_isr_task,
+			     (unsigned long)pdata);
+
 		ret = devm_request_irq(pdata->dev, pdata->i2c_irq,
 				       xgbe_i2c_isr, 0, pdata->i2c_name,
 				       pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 17ac8f9a51a0..500147d9e3c8 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -140,14 +140,16 @@ static void xgbe_default_config(struct xgbe_prv_data *pdata)
 {
 	DBGPR("-->xgbe_default_config\n");
 
-	pdata->pblx8 = DMA_PBL_X8_ENABLE;
+	pdata->blen = DMA_SBMR_BLEN_64;
+	pdata->pbl = DMA_PBL_128;
+	pdata->aal = 1;
+	pdata->rd_osr_limit = 8;
+	pdata->wr_osr_limit = 8;
 	pdata->tx_sf_mode = MTL_TSF_ENABLE;
 	pdata->tx_threshold = MTL_TX_THRESHOLD_64;
-	pdata->tx_pbl = DMA_PBL_16;
 	pdata->tx_osp_mode = DMA_OSP_ENABLE;
 	pdata->rx_sf_mode = MTL_RSF_DISABLE;
 	pdata->rx_threshold = MTL_RX_THRESHOLD_64;
-	pdata->rx_pbl = DMA_PBL_16;
 	pdata->pause_autoneg = 1;
 	pdata->tx_pause = 1;
 	pdata->rx_pause = 1;
@@ -277,7 +279,11 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
 	pdata->desc_ded_period = jiffies;
 
 	/* Issue software reset to device */
-	pdata->hw_if.exit(pdata);
+	ret = pdata->hw_if.exit(pdata);
+	if (ret) {
+		dev_err(dev, "software reset failed\n");
+		return ret;
+	}
 
 	/* Set default configuration data */
 	xgbe_default_config(pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index b672d9249539..80684914dd8a 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -665,6 +665,10 @@ static void xgbe_an37_isr(struct xgbe_prv_data *pdata)
 	} else {
 		/* Enable AN interrupts */
 		xgbe_an37_enable_interrupts(pdata);
+
+		/* Reissue interrupt if status is not clear */
+		if (pdata->vdata->irq_reissue_support)
+			XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3);
 	}
 }
 
@@ -684,10 +688,14 @@ static void xgbe_an73_isr(struct xgbe_prv_data *pdata)
 	} else {
 		/* Enable AN interrupts */
 		xgbe_an73_enable_interrupts(pdata);
+
+		/* Reissue interrupt if status is not clear */
+		if (pdata->vdata->irq_reissue_support)
+			XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3);
 	}
 }
 
-static irqreturn_t xgbe_an_isr(int irq, void *data)
+static void xgbe_an_isr_task(unsigned long data)
 {
 	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
 
@@ -705,13 +713,25 @@ static irqreturn_t xgbe_an_isr(int irq, void *data)
 	default:
 		break;
 	}
+}
+
+static irqreturn_t xgbe_an_isr(int irq, void *data)
+{
+	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+
+	if (pdata->isr_as_tasklet)
+		tasklet_schedule(&pdata->tasklet_an);
+	else
+		xgbe_an_isr_task((unsigned long)pdata);
 
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t xgbe_an_combined_isr(int irq, struct xgbe_prv_data *pdata)
+static irqreturn_t xgbe_an_combined_isr(struct xgbe_prv_data *pdata)
 {
-	return xgbe_an_isr(irq, pdata);
+	xgbe_an_isr_task((unsigned long)pdata);
+
+	return IRQ_HANDLED;
 }
 
 static void xgbe_an_irq_work(struct work_struct *work)
@@ -915,6 +935,10 @@ static void xgbe_an_state_machine(struct work_struct *work)
 		break;
 	}
 
+	/* Reissue interrupt if status is not clear */
+	if (pdata->vdata->irq_reissue_support)
+		XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3);
+
 	mutex_unlock(&pdata->an_mutex);
 }
 
@@ -1379,6 +1403,9 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
 
 	/* If we have a separate AN irq, enable it */
 	if (pdata->dev_irq != pdata->an_irq) {
+		tasklet_init(&pdata->tasklet_an, xgbe_an_isr_task,
+			     (unsigned long)pdata);
+
 		ret = devm_request_irq(pdata->dev, pdata->an_irq,
 				       xgbe_an_isr, 0, pdata->an_name,
 				       pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index 38392a520725..1e56ad7bd9a5 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -139,6 +139,7 @@ static int xgbe_config_multi_msi(struct xgbe_prv_data *pdata)
 		return ret;
 	}
 
+	pdata->isr_as_tasklet = 1;
 	pdata->irq_count = ret;
 
 	pdata->dev_irq = pci_irq_vector(pdata->pcidev, 0);
@@ -175,6 +176,7 @@ static int xgbe_config_irqs(struct xgbe_prv_data *pdata)
 		return ret;
 	}
 
+	pdata->isr_as_tasklet = pdata->pcidev->msi_enabled ? 1 : 0;
 	pdata->irq_count = 1;
 	pdata->channel_irq_count = 1;
 
@@ -325,9 +327,9 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* Set the DMA coherency values */
 	pdata->coherent = 1;
-	pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
-	pdata->arcache = XGBE_DMA_OS_ARCACHE;
-	pdata->awcache = XGBE_DMA_OS_AWCACHE;
+	pdata->arcr = XGBE_DMA_PCI_ARCR;
+	pdata->awcr = XGBE_DMA_PCI_AWCR;
+	pdata->awarcr = XGBE_DMA_PCI_AWARCR;
 
 	/* Set the maximum channels and queues */
 	reg = XP_IOREAD(pdata, XP_PROP_1);
@@ -445,6 +447,9 @@ static const struct xgbe_version_data xgbe_v2a = {
 	.tx_tstamp_workaround		= 1,
 	.ecc_support			= 1,
 	.i2c_support			= 1,
+	.irq_reissue_support		= 1,
+	.tx_desc_prefetch		= 5,
+	.rx_desc_prefetch		= 5,
 };
 
 static const struct xgbe_version_data xgbe_v2b = {
@@ -456,6 +461,9 @@ static const struct xgbe_version_data xgbe_v2b = {
 	.tx_tstamp_workaround		= 1,
 	.ecc_support			= 1,
 	.i2c_support			= 1,
+	.irq_reissue_support		= 1,
+	.tx_desc_prefetch		= 5,
+	.rx_desc_prefetch		= 5,
 };
 
 static const struct pci_device_id xgbe_pci_table[] = {
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index e707c49cc55a..04b5c149caca 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -711,23 +711,39 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 
+	if (!phy_data->sfp_mod_absent && !phy_data->sfp_changed)
+		return;
+
+	pdata->phy.supported &= ~SUPPORTED_Autoneg;
+	pdata->phy.supported &= ~(SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+	pdata->phy.supported &= ~SUPPORTED_TP;
+	pdata->phy.supported &= ~SUPPORTED_FIBRE;
+	pdata->phy.supported &= ~SUPPORTED_100baseT_Full;
+	pdata->phy.supported &= ~SUPPORTED_1000baseT_Full;
+	pdata->phy.supported &= ~SUPPORTED_10000baseT_Full;
+
 	if (phy_data->sfp_mod_absent) {
 		pdata->phy.speed = SPEED_UNKNOWN;
 		pdata->phy.duplex = DUPLEX_UNKNOWN;
 		pdata->phy.autoneg = AUTONEG_ENABLE;
+		pdata->phy.pause_autoneg = AUTONEG_ENABLE;
+
+		pdata->phy.supported |= SUPPORTED_Autoneg;
+		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		pdata->phy.supported |= SUPPORTED_TP;
+		pdata->phy.supported |= SUPPORTED_FIBRE;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
+			pdata->phy.supported |= SUPPORTED_100baseT_Full;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
+			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+
 		pdata->phy.advertising = pdata->phy.supported;
 
 		return;
 	}
 
-	pdata->phy.advertising &= ~ADVERTISED_Autoneg;
-	pdata->phy.advertising &= ~ADVERTISED_TP;
-	pdata->phy.advertising &= ~ADVERTISED_FIBRE;
-	pdata->phy.advertising &= ~ADVERTISED_100baseT_Full;
-	pdata->phy.advertising &= ~ADVERTISED_1000baseT_Full;
-	pdata->phy.advertising &= ~ADVERTISED_10000baseT_Full;
-	pdata->phy.advertising &= ~ADVERTISED_10000baseR_FEC;
-
 	switch (phy_data->sfp_base) {
 	case XGBE_SFP_BASE_1000_T:
 	case XGBE_SFP_BASE_1000_SX:
@@ -736,17 +752,25 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
 		pdata->phy.speed = SPEED_UNKNOWN;
 		pdata->phy.duplex = DUPLEX_UNKNOWN;
 		pdata->phy.autoneg = AUTONEG_ENABLE;
-		pdata->phy.advertising |= ADVERTISED_Autoneg;
+		pdata->phy.pause_autoneg = AUTONEG_ENABLE;
+		pdata->phy.supported |= SUPPORTED_Autoneg;
+		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 		break;
 	case XGBE_SFP_BASE_10000_SR:
 	case XGBE_SFP_BASE_10000_LR:
 	case XGBE_SFP_BASE_10000_LRM:
 	case XGBE_SFP_BASE_10000_ER:
 	case XGBE_SFP_BASE_10000_CR:
-	default:
 		pdata->phy.speed = SPEED_10000;
 		pdata->phy.duplex = DUPLEX_FULL;
 		pdata->phy.autoneg = AUTONEG_DISABLE;
+		pdata->phy.pause_autoneg = AUTONEG_DISABLE;
+		break;
+	default:
+		pdata->phy.speed = SPEED_UNKNOWN;
+		pdata->phy.duplex = DUPLEX_UNKNOWN;
+		pdata->phy.autoneg = AUTONEG_DISABLE;
+		pdata->phy.pause_autoneg = AUTONEG_DISABLE;
 		break;
 	}
 
@@ -754,36 +778,38 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
 	case XGBE_SFP_BASE_1000_T:
 	case XGBE_SFP_BASE_1000_CX:
 	case XGBE_SFP_BASE_10000_CR:
-		pdata->phy.advertising |= ADVERTISED_TP;
+		pdata->phy.supported |= SUPPORTED_TP;
 		break;
 	default:
-		pdata->phy.advertising |= ADVERTISED_FIBRE;
+		pdata->phy.supported |= SUPPORTED_FIBRE;
 	}
 
 	switch (phy_data->sfp_speed) {
 	case XGBE_SFP_SPEED_100_1000:
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
-			pdata->phy.advertising |= ADVERTISED_100baseT_Full;
+			pdata->phy.supported |= SUPPORTED_100baseT_Full;
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
-			pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
 		break;
 	case XGBE_SFP_SPEED_1000:
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
-			pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
 		break;
 	case XGBE_SFP_SPEED_10000:
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
-			pdata->phy.advertising |= ADVERTISED_10000baseT_Full;
+			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
 		break;
 	default:
 		/* Choose the fastest supported speed */
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
-			pdata->phy.advertising |= ADVERTISED_10000baseT_Full;
+			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
 		else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
-			pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
 		else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
-			pdata->phy.advertising |= ADVERTISED_100baseT_Full;
+			pdata->phy.supported |= SUPPORTED_100baseT_Full;
 	}
+
+	pdata->phy.advertising = pdata->phy.supported;
 }
 
 static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom,
@@ -1095,7 +1121,8 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
 
 	ret = xgbe_phy_sfp_get_mux(pdata);
 	if (ret) {
-		netdev_err(pdata->netdev, "I2C error setting SFP MUX\n");
+		dev_err_once(pdata->dev, "%s: I2C error setting SFP MUX\n",
+			     netdev_name(pdata->netdev));
 		return ret;
 	}
 
@@ -1105,7 +1132,8 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
 				&eeprom_addr, sizeof(eeprom_addr),
 				&sfp_eeprom, sizeof(sfp_eeprom));
 	if (ret) {
-		netdev_err(pdata->netdev, "I2C error reading SFP EEPROM\n");
+		dev_err_once(pdata->dev, "%s: I2C error reading SFP EEPROM\n",
+			     netdev_name(pdata->netdev));
 		goto put;
 	}
 
@@ -1164,7 +1192,8 @@ static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata)
 				&gpio_reg, sizeof(gpio_reg),
 				gpio_ports, sizeof(gpio_ports));
 	if (ret) {
-		netdev_err(pdata->netdev, "I2C error reading SFP GPIOs\n");
+		dev_err_once(pdata->dev, "%s: I2C error reading SFP GPIOs\n",
+			     netdev_name(pdata->netdev));
 		return;
 	}
 
@@ -1694,19 +1723,25 @@ static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata)
 	xgbe_phy_put_comm_ownership(pdata);
 }
 
-static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata)
+static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
+					unsigned int cmd, unsigned int sub_cmd)
 {
-	if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
-		return;
+	unsigned int s0 = 0;
+	unsigned int wait;
 
 	/* Log if a previous command did not complete */
-	netif_dbg(pdata, link, pdata->netdev,
-		  "firmware mailbox not ready for command\n");
-}
+	if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
+		netif_dbg(pdata, link, pdata->netdev,
+			  "firmware mailbox not ready for command\n");
 
-static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata)
-{
-	unsigned int wait;
+	/* Construct the command */
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd);
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, sub_cmd);
+
+	/* Issue the command */
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
 
 	/* Wait for command to complete */
 	wait = XGBE_RATECHANGE_COUNT;
@@ -1723,21 +1758,8 @@ static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata)
 
 static void xgbe_phy_rrc(struct xgbe_prv_data *pdata)
 {
-	unsigned int s0;
-
-	xgbe_phy_start_ratechange(pdata);
-
 	/* Receiver Reset Cycle */
-	s0 = 0;
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 5);
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
-
-	/* Call FW to make the change */
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-	xgbe_phy_complete_ratechange(pdata);
+	xgbe_phy_perform_ratechange(pdata, 5, 0);
 
 	netif_dbg(pdata, link, pdata->netdev, "receiver reset complete\n");
 }
@@ -1746,14 +1768,8 @@ static void xgbe_phy_power_off(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 
-	xgbe_phy_start_ratechange(pdata);
-
-	/* Call FW to make the change */
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, 0);
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-	xgbe_phy_complete_ratechange(pdata);
+	/* Power off */
+	xgbe_phy_perform_ratechange(pdata, 0, 0);
 
 	phy_data->cur_mode = XGBE_MODE_UNKNOWN;
 
@@ -1763,33 +1779,21 @@ static void xgbe_phy_power_off(struct xgbe_prv_data *pdata)
 static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int s0;
 
 	xgbe_phy_set_redrv_mode(pdata);
 
-	xgbe_phy_start_ratechange(pdata);
-
 	/* 10G/SFI */
-	s0 = 0;
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 3);
 	if (phy_data->sfp_cable != XGBE_SFP_CABLE_PASSIVE) {
-		XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+		xgbe_phy_perform_ratechange(pdata, 3, 0);
 	} else {
 		if (phy_data->sfp_cable_len <= 1)
-			XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1);
+			xgbe_phy_perform_ratechange(pdata, 3, 1);
 		else if (phy_data->sfp_cable_len <= 3)
-			XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2);
+			xgbe_phy_perform_ratechange(pdata, 3, 2);
 		else
-			XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
+			xgbe_phy_perform_ratechange(pdata, 3, 3);
 	}
 
-	/* Call FW to make the change */
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-	xgbe_phy_complete_ratechange(pdata);
-
 	phy_data->cur_mode = XGBE_MODE_SFI;
 
 	netif_dbg(pdata, link, pdata->netdev, "10GbE SFI mode set\n");
@@ -1798,23 +1802,11 @@ static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata)
 static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int s0;
 
 	xgbe_phy_set_redrv_mode(pdata);
 
-	xgbe_phy_start_ratechange(pdata);
-
 	/* 1G/X */
-	s0 = 0;
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
-
-	/* Call FW to make the change */
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-	xgbe_phy_complete_ratechange(pdata);
+	xgbe_phy_perform_ratechange(pdata, 1, 3);
 
 	phy_data->cur_mode = XGBE_MODE_X;
 
@@ -1824,23 +1816,11 @@ static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata)
 static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int s0;
 
 	xgbe_phy_set_redrv_mode(pdata);
 
-	xgbe_phy_start_ratechange(pdata);
-
 	/* 1G/SGMII */
-	s0 = 0;
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2);
-
-	/* Call FW to make the change */
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-	xgbe_phy_complete_ratechange(pdata);
+	xgbe_phy_perform_ratechange(pdata, 1, 2);
 
 	phy_data->cur_mode = XGBE_MODE_SGMII_1000;
 
@@ -1850,23 +1830,11 @@ static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata)
 static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int s0;
 
 	xgbe_phy_set_redrv_mode(pdata);
 
-	xgbe_phy_start_ratechange(pdata);
-
-	/* 1G/SGMII */
-	s0 = 0;
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1);
-
-	/* Call FW to make the change */
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-	xgbe_phy_complete_ratechange(pdata);
+	/* 100M/SGMII */
+	xgbe_phy_perform_ratechange(pdata, 1, 1);
 
 	phy_data->cur_mode = XGBE_MODE_SGMII_100;
 
@@ -1876,23 +1844,11 @@ static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata)
 static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int s0;
 
 	xgbe_phy_set_redrv_mode(pdata);
 
-	xgbe_phy_start_ratechange(pdata);
-
 	/* 10G/KR */
-	s0 = 0;
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 4);
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
-
-	/* Call FW to make the change */
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-	xgbe_phy_complete_ratechange(pdata);
+	xgbe_phy_perform_ratechange(pdata, 4, 0);
 
 	phy_data->cur_mode = XGBE_MODE_KR;
 
@@ -1902,23 +1858,11 @@ static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata)
 static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int s0;
 
 	xgbe_phy_set_redrv_mode(pdata);
 
-	xgbe_phy_start_ratechange(pdata);
-
 	/* 2.5G/KX */
-	s0 = 0;
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 2);
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
-
-	/* Call FW to make the change */
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-	xgbe_phy_complete_ratechange(pdata);
+	xgbe_phy_perform_ratechange(pdata, 2, 0);
 
 	phy_data->cur_mode = XGBE_MODE_KX_2500;
 
@@ -1928,23 +1872,11 @@ static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata)
 static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int s0;
 
 	xgbe_phy_set_redrv_mode(pdata);
 
-	xgbe_phy_start_ratechange(pdata);
-
 	/* 1G/KX */
-	s0 = 0;
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
-	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
-
-	/* Call FW to make the change */
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-	xgbe_phy_complete_ratechange(pdata);
+	xgbe_phy_perform_ratechange(pdata, 1, 3);
 
 	phy_data->cur_mode = XGBE_MODE_KX_1000;
 
@@ -2037,6 +1969,8 @@ static enum xgbe_mode xgbe_phy_get_baset_mode(struct xgbe_phy_data *phy_data,
 		return XGBE_MODE_SGMII_100;
 	case SPEED_1000:
 		return XGBE_MODE_SGMII_1000;
+	case SPEED_2500:
+		return XGBE_MODE_KX_2500;
 	case SPEED_10000:
 		return XGBE_MODE_KR;
 	default:
@@ -2180,6 +2114,9 @@ static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata,
 	case XGBE_MODE_SGMII_1000:
 		return xgbe_phy_check_mode(pdata, mode,
 					   ADVERTISED_1000baseT_Full);
+	case XGBE_MODE_KX_2500:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_2500baseX_Full);
 	case XGBE_MODE_KR:
 		return xgbe_phy_check_mode(pdata, mode,
 					   ADVERTISED_10000baseT_Full);
@@ -2210,6 +2147,8 @@ static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata,
 		return xgbe_phy_check_mode(pdata, mode,
 					   ADVERTISED_1000baseT_Full);
 	case XGBE_MODE_SFI:
+		if (phy_data->sfp_mod_absent)
+			return true;
 		return xgbe_phy_check_mode(pdata, mode,
 					   ADVERTISED_10000baseT_Full);
 	default:
@@ -2287,6 +2226,8 @@ static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data,
 	case SPEED_100:
 	case SPEED_1000:
 		return true;
+	case SPEED_2500:
+		return (phy_data->port_mode == XGBE_PORT_MODE_NBASE_T);
 	case SPEED_10000:
 		return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_T);
 	default:
@@ -3013,9 +2954,6 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
 			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
 			phy_data->start_mode = XGBE_MODE_SFI;
-			if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
-				pdata->phy.supported |=
-					SUPPORTED_10000baseR_FEC;
 		}
 
 		phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
index 84d4c51cab8c..d0f3dfb88202 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -448,13 +448,11 @@ static int xgbe_platform_probe(struct platform_device *pdev)
 	}
 	pdata->coherent = (attr == DEV_DMA_COHERENT);
 	if (pdata->coherent) {
-		pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
-		pdata->arcache = XGBE_DMA_OS_ARCACHE;
-		pdata->awcache = XGBE_DMA_OS_AWCACHE;
+		pdata->arcr = XGBE_DMA_OS_ARCR;
+		pdata->awcr = XGBE_DMA_OS_AWCR;
 	} else {
-		pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN;
-		pdata->arcache = XGBE_DMA_SYS_ARCACHE;
-		pdata->awcache = XGBE_DMA_SYS_AWCACHE;
+		pdata->arcr = XGBE_DMA_SYS_ARCR;
+		pdata->awcr = XGBE_DMA_SYS_AWCR;
 	}
 
 	/* Set the maximum fifo amounts */
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
index a533a6cc2d53..d06d260cf1e2 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
@@ -267,7 +267,7 @@ void xgbe_ptp_register(struct xgbe_prv_data *pdata)
 			 ktime_to_ns(ktime_get_real()));
 
 	/* Disable all timestamping to start */
-	XGMAC_IOWRITE(pdata, MAC_TCR, 0);
+	XGMAC_IOWRITE(pdata, MAC_TSCR, 0);
 	pdata->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
 	pdata->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index f9a24639f574..0938294f640a 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -128,6 +128,7 @@
 #include <linux/net_tstamp.h>
 #include <net/dcbnl.h>
 #include <linux/completion.h>
+#include <linux/cpumask.h>
 
 #define XGBE_DRV_NAME		"amd-xgbe"
 #define XGBE_DRV_VERSION	"1.0.3"
@@ -163,14 +164,17 @@
 #define XGBE_DMA_STOP_TIMEOUT	1
 
 /* DMA cache settings - Outer sharable, write-back, write-allocate */
-#define XGBE_DMA_OS_AXDOMAIN	0x2
-#define XGBE_DMA_OS_ARCACHE	0xb
-#define XGBE_DMA_OS_AWCACHE	0xf
+#define XGBE_DMA_OS_ARCR	0x002b2b2b
+#define XGBE_DMA_OS_AWCR	0x2f2f2f2f
 
 /* DMA cache settings - System, no caches used */
-#define XGBE_DMA_SYS_AXDOMAIN	0x3
-#define XGBE_DMA_SYS_ARCACHE	0x0
-#define XGBE_DMA_SYS_AWCACHE	0x0
+#define XGBE_DMA_SYS_ARCR	0x00303030
+#define XGBE_DMA_SYS_AWCR	0x30303030
+
+/* DMA cache settings - PCI device */
+#define XGBE_DMA_PCI_ARCR	0x00000003
+#define XGBE_DMA_PCI_AWCR	0x13131313
+#define XGBE_DMA_PCI_AWARCR	0x00000313
 
 /* DMA channel interrupt modes */
 #define XGBE_IRQ_MODE_EDGE	0
@@ -412,6 +416,7 @@ struct xgbe_ring {
 	/* Page allocation for RX buffers */
 	struct xgbe_page_alloc rx_hdr_pa;
 	struct xgbe_page_alloc rx_buf_pa;
+	int node;
 
 	/* Ring index values
 	 *  cur   - Tx: index of descriptor to be used for current transfer
@@ -462,6 +467,9 @@ struct xgbe_channel {
 
 	struct xgbe_ring *tx_ring;
 	struct xgbe_ring *rx_ring;
+
+	int node;
+	cpumask_t affinity_mask;
 } ____cacheline_aligned;
 
 enum xgbe_state {
@@ -734,13 +742,6 @@ struct xgbe_hw_if {
 	/* For TX DMA Operate on Second Frame config */
 	int (*config_osp_mode)(struct xgbe_prv_data *);
 
-	/* For RX and TX PBL config */
-	int (*config_rx_pbl_val)(struct xgbe_prv_data *);
-	int (*get_rx_pbl_val)(struct xgbe_prv_data *);
-	int (*config_tx_pbl_val)(struct xgbe_prv_data *);
-	int (*get_tx_pbl_val)(struct xgbe_prv_data *);
-	int (*config_pblx8)(struct xgbe_prv_data *);
-
 	/* For MMC statistics */
 	void (*rx_mmc_int)(struct xgbe_prv_data *);
 	void (*tx_mmc_int)(struct xgbe_prv_data *);
@@ -837,7 +838,7 @@ struct xgbe_phy_if {
 	bool (*phy_valid_speed)(struct xgbe_prv_data *, int);
 
 	/* For single interrupt support */
-	irqreturn_t (*an_isr)(int, struct xgbe_prv_data *);
+	irqreturn_t (*an_isr)(struct xgbe_prv_data *);
 
 	/* PHY implementation specific services */
 	struct xgbe_phy_impl_if phy_impl;
@@ -855,7 +856,7 @@ struct xgbe_i2c_if {
 	int (*i2c_xfer)(struct xgbe_prv_data *, struct xgbe_i2c_op *);
 
 	/* For single interrupt support */
-	irqreturn_t (*i2c_isr)(int, struct xgbe_prv_data *);
+	irqreturn_t (*i2c_isr)(struct xgbe_prv_data *);
 };
 
 struct xgbe_desc_if {
@@ -924,6 +925,9 @@ struct xgbe_version_data {
 	unsigned int tx_tstamp_workaround;
 	unsigned int ecc_support;
 	unsigned int i2c_support;
+	unsigned int irq_reissue_support;
+	unsigned int tx_desc_prefetch;
+	unsigned int rx_desc_prefetch;
 };
 
 struct xgbe_prv_data {
@@ -1001,9 +1005,9 @@ struct xgbe_prv_data {
 
 	/* AXI DMA settings */
 	unsigned int coherent;
-	unsigned int axdomain;
-	unsigned int arcache;
-	unsigned int awcache;
+	unsigned int arcr;
+	unsigned int awcr;
+	unsigned int awarcr;
 
 	/* Service routine support */
 	struct workqueue_struct *dev_workqueue;
@@ -1011,7 +1015,7 @@ struct xgbe_prv_data {
 	struct timer_list service_timer;
 
 	/* Rings for Tx/Rx on a DMA channel */
-	struct xgbe_channel *channel;
+	struct xgbe_channel *channel[XGBE_MAX_DMA_CHANNELS];
 	unsigned int tx_max_channel_count;
 	unsigned int rx_max_channel_count;
 	unsigned int channel_count;
@@ -1026,19 +1030,21 @@ struct xgbe_prv_data {
 	unsigned int rx_q_count;
 
 	/* Tx/Rx common settings */
-	unsigned int pblx8;
+	unsigned int blen;
+	unsigned int pbl;
+	unsigned int aal;
+	unsigned int rd_osr_limit;
+	unsigned int wr_osr_limit;
 
 	/* Tx settings */
 	unsigned int tx_sf_mode;
 	unsigned int tx_threshold;
-	unsigned int tx_pbl;
 	unsigned int tx_osp_mode;
 	unsigned int tx_max_fifo_size;
 
 	/* Rx settings */
 	unsigned int rx_sf_mode;
 	unsigned int rx_threshold;
-	unsigned int rx_pbl;
 	unsigned int rx_max_fifo_size;
 
 	/* Tx coalescing settings */
@@ -1159,6 +1165,12 @@ struct xgbe_prv_data {
 
 	unsigned int lpm_ctrl;		/* CTRL1 for resume */
 
+	unsigned int isr_as_tasklet;
+	struct tasklet_struct tasklet_dev;
+	struct tasklet_struct tasklet_ecc;
+	struct tasklet_struct tasklet_i2c;
+	struct tasklet_struct tasklet_an;
+
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *xgbe_debugfs;
 
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 7e913d8331c3..8c9986f3fc01 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2252,7 +2252,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 
 	if (atl1c_tx_map(adapter, skb, tpd, type) < 0) {
 		netif_info(adapter, tx_done, adapter->netdev,
-			   "tx-skb droppted due to dma error\n");
+			   "tx-skb dropped due to dma error\n");
 		/* roll back tpd/buffer */
 		atl1c_tx_rollback(adapter, tpd, type);
 		dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 14c236e5bdb1..c12b4d3e946e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12729,7 +12729,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
 	} else {
 		/* If no mc addresses are required, flush the configuration */
 		rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
-		if (rc)
+		if (rc < 0)
 			BNX2X_ERR("Failed to clear multicast configuration %d\n",
 				  rc);
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 11e8a866a312..a19f68f5862d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1311,10 +1311,11 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 		cp_cons = NEXT_CMP(cp_cons);
 	}
 
-	if (unlikely(agg_bufs > MAX_SKB_FRAGS)) {
+	if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
 		bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
-		netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
-			    agg_bufs, (int)MAX_SKB_FRAGS);
+		if (agg_bufs > MAX_SKB_FRAGS)
+			netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
+				    agg_bufs, (int)MAX_SKB_FRAGS);
 		return NULL;
 	}
 
@@ -1573,6 +1574,45 @@ next_rx_no_prod:
 	return rc;
 }
 
+/* In netpoll mode, if we are using a combined completion ring, we need to
+ * discard the rx packets and recycle the buffers.
+ */
+static int bnxt_force_rx_discard(struct bnxt *bp, struct bnxt_napi *bnapi,
+				 u32 *raw_cons, u8 *event)
+{
+	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	u32 tmp_raw_cons = *raw_cons;
+	struct rx_cmp_ext *rxcmp1;
+	struct rx_cmp *rxcmp;
+	u16 cp_cons;
+	u8 cmp_type;
+
+	cp_cons = RING_CMP(tmp_raw_cons);
+	rxcmp = (struct rx_cmp *)
+			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+	tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
+	cp_cons = RING_CMP(tmp_raw_cons);
+	rxcmp1 = (struct rx_cmp_ext *)
+			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+	if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+		return -EBUSY;
+
+	cmp_type = RX_CMP_TYPE(rxcmp);
+	if (cmp_type == CMP_TYPE_RX_L2_CMP) {
+		rxcmp1->rx_cmp_cfa_code_errors_v2 |=
+			cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
+	} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
+		struct rx_tpa_end_cmp_ext *tpa_end1;
+
+		tpa_end1 = (struct rx_tpa_end_cmp_ext *)rxcmp1;
+		tpa_end1->rx_tpa_end_cmp_errors_v2 |=
+			cpu_to_le32(RX_TPA_END_CMP_ERRORS);
+	}
+	return bnxt_rx_pkt(bp, bnapi, raw_cons, event);
+}
+
 #define BNXT_GET_EVENT_PORT(data)	\
 	((data) &			\
 	 ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
@@ -1755,7 +1795,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 			if (unlikely(tx_pkts > bp->tx_wake_thresh))
 				rx_pkts = budget;
 		} else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
-			rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+			if (likely(budget))
+				rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+			else
+				rc = bnxt_force_rx_discard(bp, bnapi, &raw_cons,
+							   &event);
 			if (likely(rc >= 0))
 				rx_pkts += rc;
 			else if (rc == -EBUSY)	/* partial completion */
@@ -6730,12 +6774,11 @@ static void bnxt_poll_controller(struct net_device *dev)
 	struct bnxt *bp = netdev_priv(dev);
 	int i;
 
-	for (i = 0; i < bp->cp_nr_rings; i++) {
-		struct bnxt_irq *irq = &bp->irq_tbl[i];
+	/* Only process tx rings/combined rings in netpoll mode. */
+	for (i = 0; i < bp->tx_nr_rings; i++) {
+		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
 
-		disable_irq(irq->vector);
-		irq->handler(irq->vector, bp->bnapi[i]);
-		enable_irq(irq->vector);
+		napi_schedule(&txr->bnapi->napi);
 	}
 }
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5984423499e6..f872a7db2ca8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -374,12 +374,16 @@ struct rx_tpa_end_cmp_ext {
 
 	__le32 rx_tpa_end_cmp_errors_v2;
 	#define RX_TPA_END_CMP_V2				(0x1 << 0)
-	#define RX_TPA_END_CMP_ERRORS				(0x7fff << 1)
+	#define RX_TPA_END_CMP_ERRORS				(0x3 << 1)
 	#define RX_TPA_END_CMPL_ERRORS_SHIFT			 1
 
 	u32 rx_tpa_end_cmp_start_opaque;
 };
 
+#define TPA_END_ERRORS(rx_tpa_end_ext)					\
+	((rx_tpa_end_ext)->rx_tpa_end_cmp_errors_v2 &			\
+	 cpu_to_le32(RX_TPA_END_CMP_ERRORS))
+
 #define DB_IDX_MASK						0xffffff
 #define DB_IDX_VALID						(0x1 << 26)
 #define DB_IRQ_DIS						(0x1 << 27)
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index 608bea171956..427d65a1a126 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -29,7 +29,15 @@ config MACB
 	  support for the MACB/GEM chip.
 
 	  To compile this driver as a module, choose M here: the module
-	  will be called macb.
+	  will be macb.
+
+config MACB_USE_HWSTAMP
+	bool "Use IEEE 1588 hwstamp"
+	depends on MACB
+	default y
+	imply PTP_1588_CLOCK
+	---help---
+	  Enable IEEE 1588 Precision Time Protocol (PTP) support for MACB.
 
 config MACB_PCI
 	tristate "Cadence PCI MACB/GEM support"
diff --git a/drivers/net/ethernet/cadence/Makefile b/drivers/net/ethernet/cadence/Makefile
index 4ba75594d5c5..1d66ddb68969 100644
--- a/drivers/net/ethernet/cadence/Makefile
+++ b/drivers/net/ethernet/cadence/Makefile
@@ -1,6 +1,11 @@
 #
 # Makefile for the Atmel network device drivers.
 #
+macb-y	:= macb_main.o
+
+ifeq ($(CONFIG_MACB_USE_HWSTAMP),y)
+macb-y	+= macb_ptp.o
+endif
 
 obj-$(CONFIG_MACB) += macb.o
 obj-$(CONFIG_MACB_PCI) += macb_pci.o
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 2510661102ba..c93f3a2dc6c1 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -11,6 +11,12 @@
 #define _MACB_H
 
 #include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP)
+#define MACB_EXT_DESC
+#endif
 
 #define MACB_GREGS_NBR 16
 #define MACB_GREGS_VERSION 2
@@ -86,6 +92,10 @@
 #define GEM_SA3T		0x009C /* Specific3 Top */
 #define GEM_SA4B		0x00A0 /* Specific4 Bottom */
 #define GEM_SA4T		0x00A4 /* Specific4 Top */
+#define GEM_EFTSH		0x00e8 /* PTP Event Frame Transmitted Seconds Register 47:32 */
+#define GEM_EFRSH		0x00ec /* PTP Event Frame Received Seconds Register 47:32 */
+#define GEM_PEFTSH		0x00f0 /* PTP Peer Event Frame Transmitted Seconds Register 47:32 */
+#define GEM_PEFRSH		0x00f4 /* PTP Peer Event Frame Received Seconds Register 47:32 */
 #define GEM_OTX			0x0100 /* Octets transmitted */
 #define GEM_OCTTXL		0x0100 /* Octets transmitted [31:0] */
 #define GEM_OCTTXH		0x0104 /* Octets transmitted [47:32] */
@@ -155,6 +165,9 @@
 #define GEM_DCFG6		0x0294 /* Design Config 6 */
 #define GEM_DCFG7		0x0298 /* Design Config 7 */
 
+#define GEM_TXBDCTRL	0x04cc /* TX Buffer Descriptor control register */
+#define GEM_RXBDCTRL	0x04d0 /* RX Buffer Descriptor control register */
+
 #define GEM_ISR(hw_q)		(0x0400 + ((hw_q) << 2))
 #define GEM_TBQP(hw_q)		(0x0440 + ((hw_q) << 2))
 #define GEM_TBQPH(hw_q)		(0x04C8)
@@ -191,6 +204,8 @@
 #define MACB_TZQ_OFFSET		12 /* Transmit zero quantum pause frame */
 #define MACB_TZQ_SIZE		1
 #define MACB_SRTSM_OFFSET	15
+#define MACB_OSSMODE_OFFSET 24 /* Enable One Step Synchro Mode */
+#define MACB_OSSMODE_SIZE	1
 
 /* Bitfields in NCFGR */
 #define MACB_SPD_OFFSET		0 /* Speed */
@@ -269,6 +284,10 @@
 #define GEM_RXBS_SIZE		8
 #define GEM_DDRP_OFFSET		24 /* disc_when_no_ahb */
 #define GEM_DDRP_SIZE		1
+#define GEM_RXEXT_OFFSET	28 /* RX extended Buffer Descriptor mode */
+#define GEM_RXEXT_SIZE		1
+#define GEM_TXEXT_OFFSET	29 /* TX extended Buffer Descriptor mode */
+#define GEM_TXEXT_SIZE		1
 #define GEM_ADDR64_OFFSET	30 /* Address bus width - 64b or 32b */
 #define GEM_ADDR64_SIZE		1
 
@@ -425,6 +444,11 @@
 #define GEM_TX_PKT_BUFF_OFFSET			21
 #define GEM_TX_PKT_BUFF_SIZE			1
 
+
+/* Bitfields in DCFG5. */
+#define GEM_TSU_OFFSET				8
+#define GEM_TSU_SIZE				1
+
 /* Bitfields in DCFG6. */
 #define GEM_PBUF_LSO_OFFSET			27
 #define GEM_PBUF_LSO_SIZE			1
@@ -439,6 +463,52 @@
 #define GEM_NSINCR_OFFSET			0
 #define GEM_NSINCR_SIZE				8
 
+/* Bitfields in TSH */
+#define GEM_TSH_OFFSET				0 /* TSU timer value (s). MSB [47:32] of seconds timer count */
+#define GEM_TSH_SIZE				16
+
+/* Bitfields in TSL */
+#define GEM_TSL_OFFSET				0 /* TSU timer value (s). LSB [31:0] of seconds timer count */
+#define GEM_TSL_SIZE				32
+
+/* Bitfields in TN */
+#define GEM_TN_OFFSET				0 /* TSU timer value (ns) */
+#define GEM_TN_SIZE					30
+
+/* Bitfields in TXBDCTRL */
+#define GEM_TXTSMODE_OFFSET			4 /* TX Descriptor Timestamp Insertion mode */
+#define GEM_TXTSMODE_SIZE			2
+
+/* Bitfields in RXBDCTRL */
+#define GEM_RXTSMODE_OFFSET			4 /* RX Descriptor Timestamp Insertion mode */
+#define GEM_RXTSMODE_SIZE			2
+
+/* Transmit DMA buffer descriptor Word 1 */
+#define GEM_DMA_TXVALID_OFFSET		23 /* timestamp has been captured in the Buffer Descriptor */
+#define GEM_DMA_TXVALID_SIZE		1
+
+/* Receive DMA buffer descriptor Word 0 */
+#define GEM_DMA_RXVALID_OFFSET		2 /* indicates a valid timestamp in the Buffer Descriptor */
+#define GEM_DMA_RXVALID_SIZE		1
+
+/* DMA buffer descriptor Word 2 (32 bit addressing) or Word 4 (64 bit addressing) */
+#define GEM_DMA_SECL_OFFSET			30 /* Timestamp seconds[1:0]  */
+#define GEM_DMA_SECL_SIZE			2
+#define GEM_DMA_NSEC_OFFSET			0 /* Timestamp nanosecs [29:0] */
+#define GEM_DMA_NSEC_SIZE			30
+
+/* DMA buffer descriptor Word 3 (32 bit addressing) or Word 5 (64 bit addressing) */
+
+/* New hardware supports 12 bit precision of timestamp in DMA buffer descriptor.
+ * Old hardware supports only 6 bit precision but it is enough for PTP.
+ * Less accuracy is used always instead of checking hardware version.
+ */
+#define GEM_DMA_SECH_OFFSET			0 /* Timestamp seconds[5:2] */
+#define GEM_DMA_SECH_SIZE			4
+#define GEM_DMA_SEC_WIDTH			(GEM_DMA_SECH_SIZE + GEM_DMA_SECL_SIZE)
+#define GEM_DMA_SEC_TOP				(1 << GEM_DMA_SEC_WIDTH)
+#define GEM_DMA_SEC_MASK			(GEM_DMA_SEC_TOP - 1)
+
 /* Bitfields in ADJ */
 #define GEM_ADDSUB_OFFSET			31
 #define GEM_ADDSUB_SIZE				1
@@ -514,6 +584,8 @@
 #define queue_readl(queue, reg)		(queue)->bp->macb_reg_readl((queue)->bp, (queue)->reg)
 #define queue_writel(queue, reg, value)	(queue)->bp->macb_reg_writel((queue)->bp, (queue)->reg, (value))
 
+#define PTP_TS_BUFFER_SIZE		128 /* must be power of 2 */
+
 /* Conditional GEM/MACB macros.  These perform the operation to the correct
  * register dependent on whether the device is a GEM or a MACB.  For registers
  * and bitfields that are common across both devices, use macb_{read,write}l
@@ -546,16 +618,26 @@ struct macb_dma_desc {
 	u32	ctrl;
 };
 
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-enum macb_hw_dma_cap {
-	HW_DMA_CAP_32B,
-	HW_DMA_CAP_64B,
-};
+#ifdef MACB_EXT_DESC
+#define HW_DMA_CAP_32B		0
+#define HW_DMA_CAP_64B		(1 << 0)
+#define HW_DMA_CAP_PTP		(1 << 1)
+#define HW_DMA_CAP_64B_PTP	(HW_DMA_CAP_64B | HW_DMA_CAP_PTP)
 
 struct macb_dma_desc_64 {
 	u32 addrh;
 	u32 resvd;
 };
+
+struct macb_dma_desc_ptp {
+	u32	ts_1;
+	u32	ts_2;
+};
+
+struct gem_tx_ts {
+	struct sk_buff *skb;
+	struct macb_dma_desc_ptp desc_ptp;
+};
 #endif
 
 /* DMA descriptor bitfields */
@@ -871,6 +953,11 @@ struct macb_config {
 	int	jumbo_max_len;
 };
 
+struct tsu_incr {
+	u32 sub_ns;
+	u32 ns;
+};
+
 struct macb_queue {
 	struct macb		*bp;
 	int			irq;
@@ -887,6 +974,12 @@ struct macb_queue {
 	struct macb_tx_skb	*tx_skb;
 	dma_addr_t		tx_ring_dma;
 	struct work_struct	tx_error_task;
+
+#ifdef CONFIG_MACB_USE_HWSTAMP
+	struct work_struct	tx_ts_task;
+	unsigned int		tx_ts_head, tx_ts_tail;
+	struct gem_tx_ts	tx_timestamps[PTP_TS_BUFFER_SIZE];
+#endif
 };
 
 struct macb {
@@ -955,11 +1048,62 @@ struct macb {
 	u32			wol;
 
 	struct macb_ptp_info	*ptp_info;	/* macb-ptp interface */
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	enum macb_hw_dma_cap hw_dma_cap;
+#ifdef MACB_EXT_DESC
+	uint8_t hw_dma_cap;
 #endif
+	spinlock_t tsu_clk_lock; /* gem tsu clock locking */
+	unsigned int tsu_rate;
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_clock_info;
+	struct tsu_incr tsu_incr;
+	struct hwtstamp_config tstamp_config;
 };
 
+#ifdef CONFIG_MACB_USE_HWSTAMP
+#define GEM_TSEC_SIZE  (GEM_TSH_SIZE + GEM_TSL_SIZE)
+#define TSU_SEC_MAX_VAL (((u64)1 << GEM_TSEC_SIZE) - 1)
+#define TSU_NSEC_MAX_VAL ((1 << GEM_TN_SIZE) - 1)
+
+enum macb_bd_control {
+	TSTAMP_DISABLED,
+	TSTAMP_FRAME_PTP_EVENT_ONLY,
+	TSTAMP_ALL_PTP_FRAMES,
+	TSTAMP_ALL_FRAMES,
+};
+
+void gem_ptp_init(struct net_device *ndev);
+void gem_ptp_remove(struct net_device *ndev);
+int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *des);
+void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc);
+static inline int gem_ptp_do_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *desc)
+{
+	if (queue->bp->tstamp_config.tx_type == TSTAMP_DISABLED)
+		return -ENOTSUPP;
+
+	return gem_ptp_txstamp(queue, skb, desc);
+}
+
+static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc)
+{
+	if (bp->tstamp_config.rx_filter == TSTAMP_DISABLED)
+		return;
+
+	gem_ptp_rxstamp(bp, skb, desc);
+}
+int gem_get_hwtst(struct net_device *dev, struct ifreq *rq);
+int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd);
+#else
+static inline void gem_ptp_init(struct net_device *ndev) { }
+static inline void gem_ptp_remove(struct net_device *ndev) { }
+
+static inline int gem_ptp_do_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *desc)
+{
+	return -1;
+}
+
+static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { }
+#endif
+
 static inline bool macb_is_gem(struct macb *bp)
 {
 	return !!(bp->caps & MACB_CAPS_MACB_IS_GEM);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb_main.c
index 3ae9d8071ded..41e5711544fc 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -79,33 +79,84 @@
 #define MACB_HALT_TIMEOUT	1230
 
 /* DMA buffer descriptor might be different size
- * depends on hardware configuration.
+ * depends on hardware configuration:
+ *
+ * 1. dma address width 32 bits:
+ *    word 1: 32 bit address of Data Buffer
+ *    word 2: control
+ *
+ * 2. dma address width 64 bits:
+ *    word 1: 32 bit address of Data Buffer
+ *    word 2: control
+ *    word 3: upper 32 bit address of Data Buffer
+ *    word 4: unused
+ *
+ * 3. dma address width 32 bits with hardware timestamping:
+ *    word 1: 32 bit address of Data Buffer
+ *    word 2: control
+ *    word 3: timestamp word 1
+ *    word 4: timestamp word 2
+ *
+ * 4. dma address width 64 bits with hardware timestamping:
+ *    word 1: 32 bit address of Data Buffer
+ *    word 2: control
+ *    word 3: upper 32 bit address of Data Buffer
+ *    word 4: unused
+ *    word 5: timestamp word 1
+ *    word 6: timestamp word 2
  */
 static unsigned int macb_dma_desc_get_size(struct macb *bp)
 {
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	if (bp->hw_dma_cap == HW_DMA_CAP_64B)
-		return sizeof(struct macb_dma_desc) + sizeof(struct macb_dma_desc_64);
+#ifdef MACB_EXT_DESC
+	unsigned int desc_size;
+
+	switch (bp->hw_dma_cap) {
+	case HW_DMA_CAP_64B:
+		desc_size = sizeof(struct macb_dma_desc)
+			+ sizeof(struct macb_dma_desc_64);
+		break;
+	case HW_DMA_CAP_PTP:
+		desc_size = sizeof(struct macb_dma_desc)
+			+ sizeof(struct macb_dma_desc_ptp);
+		break;
+	case HW_DMA_CAP_64B_PTP:
+		desc_size = sizeof(struct macb_dma_desc)
+			+ sizeof(struct macb_dma_desc_64)
+			+ sizeof(struct macb_dma_desc_ptp);
+		break;
+	default:
+		desc_size = sizeof(struct macb_dma_desc);
+	}
+	return desc_size;
 #endif
 	return sizeof(struct macb_dma_desc);
 }
 
-static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int idx)
+static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx)
 {
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	/* Dma buffer descriptor is 4 words length (instead of 2 words)
-	 * for 64b GEM.
-	 */
-	if (bp->hw_dma_cap == HW_DMA_CAP_64B)
-		idx <<= 1;
+#ifdef MACB_EXT_DESC
+	switch (bp->hw_dma_cap) {
+	case HW_DMA_CAP_64B:
+	case HW_DMA_CAP_PTP:
+		desc_idx <<= 1;
+		break;
+	case HW_DMA_CAP_64B_PTP:
+		desc_idx *= 3;
+		break;
+	default:
+		break;
+	}
+	return desc_idx;
 #endif
-	return idx;
+	return desc_idx;
 }
 
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc)
 {
-	return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc));
+	if (bp->hw_dma_cap & HW_DMA_CAP_64B)
+		return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc));
+	return NULL;
 }
 #endif
 
@@ -621,7 +672,7 @@ static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 	struct macb_dma_desc_64 *desc_64;
 
-	if (bp->hw_dma_cap == HW_DMA_CAP_64B) {
+	if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
 		desc_64 = macb_64b_desc(bp, desc);
 		desc_64->addrh = upper_32_bits(addr);
 	}
@@ -635,7 +686,7 @@ static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc)
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 	struct macb_dma_desc_64 *desc_64;
 
-	if (bp->hw_dma_cap == HW_DMA_CAP_64B) {
+	if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
 		desc_64 = macb_64b_desc(bp, desc);
 		addr = ((u64)(desc_64->addrh) << 32);
 	}
@@ -734,7 +785,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	/* Reinitialize the TX desc queue */
 	queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+	if (bp->hw_dma_cap & HW_DMA_CAP_64B)
 		queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma));
 #endif
 	/* Make TX ring reflect state of hardware */
@@ -796,6 +847,12 @@ static void macb_tx_interrupt(struct macb_queue *queue)
 
 			/* First, update TX stats if needed */
 			if (skb) {
+				if (gem_ptp_do_txstamp(queue, skb, desc) == 0) {
+					/* skb now belongs to timestamp buffer
+					 * and will be removed later
+					 */
+					tx_skb->skb = NULL;
+				}
 				netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n",
 					    macb_tx_ring_wrap(bp, tail),
 					    skb->data);
@@ -962,6 +1019,8 @@ static int gem_rx(struct macb *bp, int budget)
 		bp->dev->stats.rx_packets++;
 		bp->dev->stats.rx_bytes += skb->len;
 
+		gem_ptp_do_rxstamp(bp, skb, desc);
+
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
 		netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
 			    skb->len, skb->csum);
@@ -1283,7 +1342,6 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
 				queue_writel(queue, ISR, MACB_BIT(HRESP));
 		}
-
 		status = queue_readl(queue, ISR);
 	}
 
@@ -1613,7 +1671,6 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* Make newly initialized descriptor visible to hardware */
 	wmb();
-
 	skb_tx_timestamp(skb);
 
 	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
@@ -1942,9 +1999,13 @@ static void macb_configure_dma(struct macb *bp)
 			dmacfg &= ~GEM_BIT(TXCOEN);
 
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-		if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+		if (bp->hw_dma_cap & HW_DMA_CAP_64B)
 			dmacfg |= GEM_BIT(ADDR64);
 #endif
+#ifdef CONFIG_MACB_USE_HWSTAMP
+		if (bp->hw_dma_cap & HW_DMA_CAP_PTP)
+			dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT);
+#endif
 		netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
 			   dmacfg);
 		gem_writel(bp, DMACFG, dmacfg);
@@ -1992,13 +2053,13 @@ static void macb_init_hw(struct macb *bp)
 	/* Initialize TX and RX buffers */
 	macb_writel(bp, RBQP, lower_32_bits(bp->rx_ring_dma));
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+	if (bp->hw_dma_cap & HW_DMA_CAP_64B)
 		macb_writel(bp, RBQPH, upper_32_bits(bp->rx_ring_dma));
 #endif
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 		queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-		if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+		if (bp->hw_dma_cap & HW_DMA_CAP_64B)
 			queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma));
 #endif
 
@@ -2467,6 +2528,70 @@ static int macb_set_ringparam(struct net_device *netdev,
 	return 0;
 }
 
+#ifdef CONFIG_MACB_USE_HWSTAMP
+static unsigned int gem_get_tsu_rate(struct macb *bp)
+{
+	struct clk *tsu_clk;
+	unsigned int tsu_rate;
+
+	tsu_clk = devm_clk_get(&bp->pdev->dev, "tsu_clk");
+	if (!IS_ERR(tsu_clk))
+		tsu_rate = clk_get_rate(tsu_clk);
+	/* try pclk instead */
+	else if (!IS_ERR(bp->pclk)) {
+		tsu_clk = bp->pclk;
+		tsu_rate = clk_get_rate(tsu_clk);
+	} else
+		return -ENOTSUPP;
+	return tsu_rate;
+}
+
+static s32 gem_get_ptp_max_adj(void)
+{
+	return 64000000;
+}
+
+static int gem_get_ts_info(struct net_device *dev,
+			   struct ethtool_ts_info *info)
+{
+	struct macb *bp = netdev_priv(dev);
+
+	if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) {
+		ethtool_op_get_ts_info(dev, info);
+		return 0;
+	}
+
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_SOFTWARE |
+		SOF_TIMESTAMPING_RX_SOFTWARE |
+		SOF_TIMESTAMPING_SOFTWARE |
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->tx_types =
+		(1 << HWTSTAMP_TX_ONESTEP_SYNC) |
+		(1 << HWTSTAMP_TX_OFF) |
+		(1 << HWTSTAMP_TX_ON);
+	info->rx_filters =
+		(1 << HWTSTAMP_FILTER_NONE) |
+		(1 << HWTSTAMP_FILTER_ALL);
+
+	info->phc_index = bp->ptp_clock ? ptp_clock_index(bp->ptp_clock) : -1;
+
+	return 0;
+}
+
+static struct macb_ptp_info gem_ptp_info = {
+	.ptp_init	 = gem_ptp_init,
+	.ptp_remove	 = gem_ptp_remove,
+	.get_ptp_max_adj = gem_get_ptp_max_adj,
+	.get_tsu_rate	 = gem_get_tsu_rate,
+	.get_ts_info	 = gem_get_ts_info,
+	.get_hwtst	 = gem_get_hwtst,
+	.set_hwtst	 = gem_set_hwtst,
+};
+#endif
+
 static int macb_get_ts_info(struct net_device *netdev,
 			    struct ethtool_ts_info *info)
 {
@@ -2600,6 +2725,16 @@ static void macb_configure_caps(struct macb *bp,
 		dcfg = gem_readl(bp, DCFG2);
 		if ((dcfg & (GEM_BIT(RX_PKT_BUFF) | GEM_BIT(TX_PKT_BUFF))) == 0)
 			bp->caps |= MACB_CAPS_FIFO_MODE;
+#ifdef CONFIG_MACB_USE_HWSTAMP
+		if (gem_has_ptp(bp)) {
+			if (!GEM_BFEXT(TSU, gem_readl(bp, DCFG5)))
+				pr_err("GEM doesn't support hardware ptp.\n");
+			else {
+				bp->hw_dma_cap |= HW_DMA_CAP_PTP;
+				bp->ptp_info = &gem_ptp_info;
+			}
+		}
+#endif
 	}
 
 	dev_dbg(&bp->pdev->dev, "Cadence caps 0x%08x\n", bp->caps);
@@ -2737,7 +2872,7 @@ static int macb_init(struct platform_device *pdev)
 			queue->IMR  = GEM_IMR(hw_q - 1);
 			queue->TBQP = GEM_TBQP(hw_q - 1);
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-			if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+			if (bp->hw_dma_cap & HW_DMA_CAP_64B)
 				queue->TBQPH = GEM_TBQPH(hw_q - 1);
 #endif
 		} else {
@@ -2748,7 +2883,7 @@ static int macb_init(struct platform_device *pdev)
 			queue->IMR  = MACB_IMR;
 			queue->TBQP = MACB_TBQP;
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-			if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+			if (bp->hw_dma_cap & HW_DMA_CAP_64B)
 				queue->TBQPH = MACB_TBQPH;
 #endif
 		}
@@ -3205,7 +3340,9 @@ static const struct macb_config np4_config = {
 };
 
 static const struct macb_config zynqmp_config = {
-	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO,
+	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE |
+			MACB_CAPS_JUMBO |
+			MACB_CAPS_GEM_HAS_PTP,
 	.dma_burst_length = 16,
 	.clk_init = macb_clk_init,
 	.init = macb_init,
@@ -3239,7 +3376,9 @@ MODULE_DEVICE_TABLE(of, macb_dt_ids);
 #endif /* CONFIG_OF */
 
 static const struct macb_config default_gem_config = {
-	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO,
+	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE |
+			MACB_CAPS_JUMBO |
+			MACB_CAPS_GEM_HAS_PTP,
 	.dma_burst_length = 16,
 	.clk_init = macb_clk_init,
 	.init = macb_init,
@@ -3328,19 +3467,17 @@ static int macb_probe(struct platform_device *pdev)
 		bp->wol |= MACB_WOL_HAS_MAGIC_PACKET;
 	device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET);
 
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) {
-		dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
-		bp->hw_dma_cap = HW_DMA_CAP_64B;
-	} else
-		bp->hw_dma_cap = HW_DMA_CAP_32B;
-#endif
-
 	spin_lock_init(&bp->lock);
 
 	/* setup capabilities */
 	macb_configure_caps(bp, macb_config);
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) {
+		dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+		bp->hw_dma_cap |= HW_DMA_CAP_64B;
+	}
+#endif
 	platform_set_drvdata(pdev, dev);
 
 	dev->irq = platform_get_irq(pdev, 0);
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
new file mode 100755
index 000000000000..67cca08472b7
--- /dev/null
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -0,0 +1,518 @@
+/**
+ * 1588 PTP support for Cadence GEM device.
+ *
+ * Copyright (C) 2017 Cadence Design Systems - http://www.cadence.com
+ *
+ * Authors: Rafal Ozieblo <[email protected]>
+ *          Bartosz Folta <[email protected]>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/time64.h>
+#include <linux/ptp_classify.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
+#include <linux/circ_buf.h>
+#include <linux/spinlock.h>
+
+#include "macb.h"
+
+#define  GEM_PTP_TIMER_NAME "gem-ptp-timer"
+
+static struct macb_dma_desc_ptp *macb_ptp_desc(struct macb *bp,
+					       struct macb_dma_desc *desc)
+{
+	if (bp->hw_dma_cap == HW_DMA_CAP_PTP)
+		return (struct macb_dma_desc_ptp *)
+				((u8 *)desc + sizeof(struct macb_dma_desc));
+	if (bp->hw_dma_cap == HW_DMA_CAP_64B_PTP)
+		return (struct macb_dma_desc_ptp *)
+				((u8 *)desc + sizeof(struct macb_dma_desc)
+				+ sizeof(struct macb_dma_desc_64));
+	return NULL;
+}
+
+static int gem_tsu_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+	struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+	unsigned long flags;
+	long first, second;
+	u32 secl, sech;
+
+	spin_lock_irqsave(&bp->tsu_clk_lock, flags);
+	first = gem_readl(bp, TN);
+	secl = gem_readl(bp, TSL);
+	sech = gem_readl(bp, TSH);
+	second = gem_readl(bp, TN);
+
+	/* test for nsec rollover */
+	if (first > second) {
+		/* if so, use later read & re-read seconds
+		 * (assume all done within 1s)
+		 */
+		ts->tv_nsec = gem_readl(bp, TN);
+		secl = gem_readl(bp, TSL);
+		sech = gem_readl(bp, TSH);
+	} else {
+		ts->tv_nsec = first;
+	}
+
+	spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
+	ts->tv_sec = (((u64)sech << GEM_TSL_SIZE) | secl)
+			& TSU_SEC_MAX_VAL;
+	return 0;
+}
+
+static int gem_tsu_set_time(struct ptp_clock_info *ptp,
+			    const struct timespec64 *ts)
+{
+	struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+	unsigned long flags;
+	u32 ns, sech, secl;
+
+	secl = (u32)ts->tv_sec;
+	sech = (ts->tv_sec >> GEM_TSL_SIZE) & ((1 << GEM_TSH_SIZE) - 1);
+	ns = ts->tv_nsec;
+
+	spin_lock_irqsave(&bp->tsu_clk_lock, flags);
+
+	/* TSH doesn't latch the time and no atomicity! */
+	gem_writel(bp, TN, 0); /* clear to avoid overflow */
+	gem_writel(bp, TSH, sech);
+	/* write lower bits 2nd, for synchronized secs update */
+	gem_writel(bp, TSL, secl);
+	gem_writel(bp, TN, ns);
+
+	spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
+
+	return 0;
+}
+
+static int gem_tsu_incr_set(struct macb *bp, struct tsu_incr *incr_spec)
+{
+	unsigned long flags;
+
+	/* tsu_timer_incr register must be written after
+	 * the tsu_timer_incr_sub_ns register and the write operation
+	 * will cause the value written to the tsu_timer_incr_sub_ns register
+	 * to take effect.
+	 */
+	spin_lock_irqsave(&bp->tsu_clk_lock, flags);
+	gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, incr_spec->sub_ns));
+	gem_writel(bp, TI, GEM_BF(NSINCR, incr_spec->ns));
+	spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
+
+	return 0;
+}
+
+static int gem_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+	struct tsu_incr incr_spec;
+	bool neg_adj = false;
+	u32 word;
+	u64 adj;
+
+	if (scaled_ppm < 0) {
+		neg_adj = true;
+		scaled_ppm = -scaled_ppm;
+	}
+
+	/* Adjustment is relative to base frequency */
+	incr_spec.sub_ns = bp->tsu_incr.sub_ns;
+	incr_spec.ns = bp->tsu_incr.ns;
+
+	/* scaling: unused(8bit) | ns(8bit) | fractions(16bit) */
+	word = ((u64)incr_spec.ns << GEM_SUBNSINCR_SIZE) + incr_spec.sub_ns;
+	adj = (u64)scaled_ppm * word;
+	/* Divide with rounding, equivalent to floating dividing:
+	 * (temp / USEC_PER_SEC) + 0.5
+	 */
+	adj += (USEC_PER_SEC >> 1);
+	adj >>= GEM_SUBNSINCR_SIZE; /* remove fractions */
+	adj = div_u64(adj, USEC_PER_SEC);
+	adj = neg_adj ? (word - adj) : (word + adj);
+
+	incr_spec.ns = (adj >> GEM_SUBNSINCR_SIZE)
+			& ((1 << GEM_NSINCR_SIZE) - 1);
+	incr_spec.sub_ns = adj & ((1 << GEM_SUBNSINCR_SIZE) - 1);
+	gem_tsu_incr_set(bp, &incr_spec);
+	return 0;
+}
+
+static int gem_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+	struct timespec64 now, then = ns_to_timespec64(delta);
+	u32 adj, sign = 0;
+
+	if (delta < 0) {
+		sign = 1;
+		delta = -delta;
+	}
+
+	if (delta > TSU_NSEC_MAX_VAL) {
+		gem_tsu_get_time(&bp->ptp_clock_info, &now);
+		if (sign)
+			now = timespec64_sub(now, then);
+		else
+			now = timespec64_add(now, then);
+
+		gem_tsu_set_time(&bp->ptp_clock_info,
+				 (const struct timespec64 *)&now);
+	} else {
+		adj = (sign << GEM_ADDSUB_OFFSET) | delta;
+
+		gem_writel(bp, TA, adj);
+	}
+
+	return 0;
+}
+
+static int gem_ptp_enable(struct ptp_clock_info *ptp,
+			  struct ptp_clock_request *rq, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+static struct ptp_clock_info gem_ptp_caps_template = {
+	.owner		= THIS_MODULE,
+	.name		= GEM_PTP_TIMER_NAME,
+	.max_adj	= 0,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 1,
+	.adjfine	= gem_ptp_adjfine,
+	.adjtime	= gem_ptp_adjtime,
+	.gettime64	= gem_tsu_get_time,
+	.settime64	= gem_tsu_set_time,
+	.enable		= gem_ptp_enable,
+};
+
+static void gem_ptp_init_timer(struct macb *bp)
+{
+	u32 rem = 0;
+	u64 adj;
+
+	bp->tsu_incr.ns = div_u64_rem(NSEC_PER_SEC, bp->tsu_rate, &rem);
+	if (rem) {
+		adj = rem;
+		adj <<= GEM_SUBNSINCR_SIZE;
+		bp->tsu_incr.sub_ns = div_u64(adj, bp->tsu_rate);
+	} else {
+		bp->tsu_incr.sub_ns = 0;
+	}
+}
+
+static void gem_ptp_init_tsu(struct macb *bp)
+{
+	struct timespec64 ts;
+
+	/* 1. get current system time */
+	ts = ns_to_timespec64(ktime_to_ns(ktime_get_real()));
+
+	/* 2. set ptp timer */
+	gem_tsu_set_time(&bp->ptp_clock_info, &ts);
+
+	/* 3. set PTP timer increment value to BASE_INCREMENT */
+	gem_tsu_incr_set(bp, &bp->tsu_incr);
+
+	gem_writel(bp, TA, 0);
+}
+
+static void gem_ptp_clear_timer(struct macb *bp)
+{
+	bp->tsu_incr.sub_ns = 0;
+	bp->tsu_incr.ns = 0;
+
+	gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, 0));
+	gem_writel(bp, TI, GEM_BF(NSINCR, 0));
+	gem_writel(bp, TA, 0);
+}
+
+static int gem_hw_timestamp(struct macb *bp, u32 dma_desc_ts_1,
+			    u32 dma_desc_ts_2, struct timespec64 *ts)
+{
+	struct timespec64 tsu;
+
+	ts->tv_sec = (GEM_BFEXT(DMA_SECH, dma_desc_ts_2) << GEM_DMA_SECL_SIZE) |
+			GEM_BFEXT(DMA_SECL, dma_desc_ts_1);
+	ts->tv_nsec = GEM_BFEXT(DMA_NSEC, dma_desc_ts_1);
+
+	/* TSU overlapping workaround
+	 * The timestamp only contains lower few bits of seconds,
+	 * so add value from 1588 timer
+	 */
+	gem_tsu_get_time(&bp->ptp_clock_info, &tsu);
+
+	/* If the top bit is set in the timestamp,
+	 * but not in 1588 timer, it has rolled over,
+	 * so subtract max size
+	 */
+	if ((ts->tv_sec & (GEM_DMA_SEC_TOP >> 1)) &&
+	    !(tsu.tv_sec & (GEM_DMA_SEC_TOP >> 1)))
+		ts->tv_sec -= GEM_DMA_SEC_TOP;
+
+	ts->tv_sec += ((~GEM_DMA_SEC_MASK) & tsu.tv_sec);
+
+	return 0;
+}
+
+void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb,
+		     struct macb_dma_desc *desc)
+{
+	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+	struct macb_dma_desc_ptp *desc_ptp;
+	struct timespec64 ts;
+
+	if (GEM_BFEXT(DMA_RXVALID, desc->addr)) {
+		desc_ptp = macb_ptp_desc(bp, desc);
+		gem_hw_timestamp(bp, desc_ptp->ts_1, desc_ptp->ts_2, &ts);
+		memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
+		shhwtstamps->hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+	}
+}
+
+static void gem_tstamp_tx(struct macb *bp, struct sk_buff *skb,
+			  struct macb_dma_desc_ptp *desc_ptp)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+	struct timespec64 ts;
+
+	gem_hw_timestamp(bp, desc_ptp->ts_1, desc_ptp->ts_2, &ts);
+	memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+	shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+	skb_tstamp_tx(skb, &shhwtstamps);
+}
+
+int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb,
+		    struct macb_dma_desc *desc)
+{
+	unsigned long tail = READ_ONCE(queue->tx_ts_tail);
+	unsigned long head = queue->tx_ts_head;
+	struct macb_dma_desc_ptp *desc_ptp;
+	struct gem_tx_ts *tx_timestamp;
+
+	if (!GEM_BFEXT(DMA_TXVALID, desc->ctrl))
+		return -EINVAL;
+
+	if (CIRC_SPACE(head, tail, PTP_TS_BUFFER_SIZE) == 0)
+		return -ENOMEM;
+
+	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+	desc_ptp = macb_ptp_desc(queue->bp, desc);
+	tx_timestamp = &queue->tx_timestamps[head];
+	tx_timestamp->skb = skb;
+	tx_timestamp->desc_ptp.ts_1 = desc_ptp->ts_1;
+	tx_timestamp->desc_ptp.ts_2 = desc_ptp->ts_2;
+	/* move head */
+	smp_store_release(&queue->tx_ts_head,
+			  (head + 1) & (PTP_TS_BUFFER_SIZE - 1));
+
+	schedule_work(&queue->tx_ts_task);
+	return 0;
+}
+
+static void gem_tx_timestamp_flush(struct work_struct *work)
+{
+	struct macb_queue *queue =
+			container_of(work, struct macb_queue, tx_ts_task);
+	unsigned long head, tail;
+	struct gem_tx_ts *tx_ts;
+
+	/* take current head */
+	head = smp_load_acquire(&queue->tx_ts_head);
+	tail = queue->tx_ts_tail;
+
+	while (CIRC_CNT(head, tail, PTP_TS_BUFFER_SIZE)) {
+		tx_ts = &queue->tx_timestamps[tail];
+		gem_tstamp_tx(queue->bp, tx_ts->skb, &tx_ts->desc_ptp);
+		/* cleanup */
+		dev_kfree_skb_any(tx_ts->skb);
+		/* remove old tail */
+		smp_store_release(&queue->tx_ts_tail,
+				  (tail + 1) & (PTP_TS_BUFFER_SIZE - 1));
+		tail = queue->tx_ts_tail;
+	}
+}
+
+void gem_ptp_init(struct net_device *dev)
+{
+	struct macb *bp = netdev_priv(dev);
+	struct macb_queue *queue;
+	unsigned int q;
+
+	bp->ptp_clock_info = gem_ptp_caps_template;
+
+	/* nominal frequency and maximum adjustment in ppb */
+	bp->tsu_rate = bp->ptp_info->get_tsu_rate(bp);
+	bp->ptp_clock_info.max_adj = bp->ptp_info->get_ptp_max_adj();
+	gem_ptp_init_timer(bp);
+	bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &dev->dev);
+	if (IS_ERR(bp->ptp_clock)) {
+		pr_err("ptp clock register failed: %ld\n",
+			PTR_ERR(bp->ptp_clock));
+		bp->ptp_clock = NULL;
+		return;
+	} else if (bp->ptp_clock == NULL) {
+		pr_err("ptp clock register failed\n");
+		return;
+	}
+
+	spin_lock_init(&bp->tsu_clk_lock);
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		queue->tx_ts_head = 0;
+		queue->tx_ts_tail = 0;
+		INIT_WORK(&queue->tx_ts_task, gem_tx_timestamp_flush);
+	}
+
+	gem_ptp_init_tsu(bp);
+
+	dev_info(&bp->pdev->dev, "%s ptp clock registered.\n",
+		 GEM_PTP_TIMER_NAME);
+}
+
+void gem_ptp_remove(struct net_device *ndev)
+{
+	struct macb *bp = netdev_priv(ndev);
+
+	if (bp->ptp_clock)
+		ptp_clock_unregister(bp->ptp_clock);
+
+	gem_ptp_clear_timer(bp);
+
+	dev_info(&bp->pdev->dev, "%s ptp clock unregistered.\n",
+		 GEM_PTP_TIMER_NAME);
+}
+
+static int gem_ptp_set_ts_mode(struct macb *bp,
+			       enum macb_bd_control tx_bd_control,
+			       enum macb_bd_control rx_bd_control)
+{
+	gem_writel(bp, TXBDCTRL, GEM_BF(TXTSMODE, tx_bd_control));
+	gem_writel(bp, RXBDCTRL, GEM_BF(RXTSMODE, rx_bd_control));
+
+	return 0;
+}
+
+int gem_get_hwtst(struct net_device *dev, struct ifreq *rq)
+{
+	struct hwtstamp_config *tstamp_config;
+	struct macb *bp = netdev_priv(dev);
+
+	tstamp_config = &bp->tstamp_config;
+	if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0)
+		return -EOPNOTSUPP;
+
+	if (copy_to_user(rq->ifr_data, tstamp_config, sizeof(*tstamp_config)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int gem_ptp_set_one_step_sync(struct macb *bp, u8 enable)
+{
+	u32 reg_val;
+
+	reg_val = macb_readl(bp, NCR);
+
+	if (enable)
+		macb_writel(bp, NCR, reg_val | MACB_BIT(OSSMODE));
+	else
+		macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE));
+
+	return 0;
+}
+
+int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	enum macb_bd_control tx_bd_control = TSTAMP_DISABLED;
+	enum macb_bd_control rx_bd_control = TSTAMP_DISABLED;
+	struct hwtstamp_config *tstamp_config;
+	struct macb *bp = netdev_priv(dev);
+	u32 regval;
+
+	tstamp_config = &bp->tstamp_config;
+	if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(tstamp_config, ifr->ifr_data,
+			   sizeof(*tstamp_config)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (tstamp_config->flags)
+		return -EINVAL;
+
+	switch (tstamp_config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		break;
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		if (gem_ptp_set_one_step_sync(bp, 1) != 0)
+			return -ERANGE;
+	case HWTSTAMP_TX_ON:
+		tx_bd_control = TSTAMP_ALL_FRAMES;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (tstamp_config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		rx_bd_control =  TSTAMP_ALL_PTP_FRAMES;
+		tstamp_config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		regval = macb_readl(bp, NCR);
+		macb_writel(bp, NCR, (regval | MACB_BIT(SRTSM)));
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_ALL:
+		rx_bd_control = TSTAMP_ALL_FRAMES;
+		tstamp_config->rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	default:
+		tstamp_config->rx_filter = HWTSTAMP_FILTER_NONE;
+		return -ERANGE;
+	}
+
+	if (gem_ptp_set_ts_mode(bp, tx_bd_control, rx_bd_control) != 0)
+		return -ERANGE;
+
+	if (copy_to_user(ifr->ifr_data, tstamp_config, sizeof(*tstamp_config)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 573755b0a51b..49b80da51ba7 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -227,15 +227,14 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
 		nic->speed = mbx.link_status.speed;
 		nic->mac_type = mbx.link_status.mac_type;
 		if (nic->link_up) {
-			netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n",
-				    nic->netdev->name, nic->speed,
+			netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n",
+				    nic->speed,
 				    nic->duplex == DUPLEX_FULL ?
-				"Full duplex" : "Half duplex");
+				    "Full" : "Half");
 			netif_carrier_on(nic->netdev);
 			netif_tx_start_all_queues(nic->netdev);
 		} else {
-			netdev_info(nic->netdev, "%s: Link is Down\n",
-				    nic->netdev->name);
+			netdev_info(nic->netdev, "Link is Down\n");
 			netif_carrier_off(nic->netdev);
 			netif_tx_stop_all_queues(nic->netdev);
 		}
@@ -721,8 +720,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 		return;
 
 	if (netif_msg_pktdata(nic)) {
-		netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name,
-			    skb, skb->len);
+		netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len);
 		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1,
 			       skb->data, skb->len, true);
 	}
@@ -854,10 +852,8 @@ done:
 			netif_tx_wake_queue(txq);
 			nic = nic->pnicvf;
 			this_cpu_inc(nic->drv_stats->txq_wake);
-			if (netif_msg_tx_err(nic))
-				netdev_warn(netdev,
-					    "%s: Transmit queue wakeup SQ%d\n",
-					    netdev->name, txq_idx);
+			netif_warn(nic, tx_err, netdev,
+				   "Transmit queue wakeup SQ%d\n", txq_idx);
 		}
 	}
 
@@ -928,9 +924,8 @@ static void nicvf_handle_qs_err(unsigned long data)
 
 static void nicvf_dump_intr_status(struct nicvf *nic)
 {
-	if (netif_msg_intr(nic))
-		netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n",
-			    nic->netdev->name, nicvf_reg_read(nic, NIC_VF_INT));
+	netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n",
+		   nicvf_reg_read(nic, NIC_VF_INT));
 }
 
 static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
@@ -1212,10 +1207,8 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
 			netif_tx_wake_queue(txq);
 		} else {
 			this_cpu_inc(nic->drv_stats->txq_stop);
-			if (netif_msg_tx_err(nic))
-				netdev_warn(netdev,
-					    "%s: Transmit ring full, stopping SQ%d\n",
-					    netdev->name, qid);
+			netif_warn(nic, tx_err, netdev,
+				   "Transmit ring full, stopping SQ%d\n", qid);
 		}
 		return NETDEV_TX_BUSY;
 	}
@@ -1600,9 +1593,7 @@ static void nicvf_tx_timeout(struct net_device *dev)
 {
 	struct nicvf *nic = netdev_priv(dev);
 
-	if (netif_msg_tx_err(nic))
-		netdev_warn(dev, "%s: Transmit timed out, resetting\n",
-			    dev->name);
+	netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n");
 
 	this_cpu_inc(nic->drv_stats->tx_timeout);
 	schedule_work(&nic->reset_task);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 2b181762ad49..d4496e9afcdf 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -1811,11 +1811,9 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
 /* Check for errors in the receive cmp.queue entry */
 int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 {
-	if (netif_msg_rx_err(nic))
-		netdev_err(nic->netdev,
-			   "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n",
-			   nic->netdev->name,
-			   cqe_rx->err_level, cqe_rx->err_opcode);
+	netif_err(nic, rx_err, nic->netdev,
+		  "RX error CQE err_level 0x%x err_opcode 0x%x\n",
+		  cqe_rx->err_level, cqe_rx->err_opcode);
 
 	switch (cqe_rx->err_opcode) {
 	case CQ_RX_ERROP_RE_PARTIAL:
diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig
index dc0850b3b517..8870a9a798ca 100644
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig
@@ -2,6 +2,7 @@ config FSL_FMAN
 	tristate "FMan support"
 	depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
 	select GENERIC_ALLOCATOR
+	depends on HAS_DMA
 	select PHYLIB
 	default n
 	help
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index a79e257bc338..c4b4b0a1bbf0 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1718,7 +1718,7 @@ static int gfar_restore(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops gfar_pm_ops = {
+static const struct dev_pm_ops gfar_pm_ops = {
 	.suspend = gfar_suspend,
 	.resume = gfar_resume,
 	.freeze = gfar_suspend,
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 04211ac73b36..7ba653af19cb 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -360,6 +360,7 @@ enum hnae_loop {
 	MAC_INTERNALLOOP_MAC = 0,
 	MAC_INTERNALLOOP_SERDES,
 	MAC_INTERNALLOOP_PHY,
+	MAC_LOOP_PHY_NONE,
 	MAC_LOOP_NONE,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 00e57bbaf122..a8db27e86a11 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -259,67 +259,27 @@ static const char hns_nic_test_strs[][ETH_GSTRING_LEN] = {
 
 static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
 {
-#define COPPER_CONTROL_REG 0
-#define PHY_POWER_DOWN BIT(11)
-#define PHY_LOOP_BACK BIT(14)
-	u16 val = 0;
-
-	if (phy_dev->is_c45) /* c45 branch adding for XGE PHY */
-		return -ENOTSUPP;
+	int err;
 
 	if (en) {
-		/* speed : 1000M */
-		phy_write(phy_dev, HNS_PHY_PAGE_REG, 2);
-		phy_write(phy_dev, 21, 0x1046);
-
-		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
-		/* Force Master */
-		phy_write(phy_dev, 9, 0x1F00);
-
-		/* Soft-reset */
-		phy_write(phy_dev, 0, 0x9140);
-		/* If autoneg disabled,two soft-reset operations */
-		phy_write(phy_dev, 0, 0x9140);
-
-		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
-
-		/* Default is 0x0400 */
-		phy_write(phy_dev, 1, 0x418);
-
-		/* Force 1000M Link, Default is 0x0200 */
-		phy_write(phy_dev, 7, 0x20C);
-
-		/* Powerup Fiber */
-		phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
-		val = phy_read(phy_dev, COPPER_CONTROL_REG);
-		val &= ~PHY_POWER_DOWN;
-		phy_write(phy_dev, COPPER_CONTROL_REG, val);
-
-		/* Enable Phy Loopback */
-		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
-		val = phy_read(phy_dev, COPPER_CONTROL_REG);
-		val |= PHY_LOOP_BACK;
-		val &= ~PHY_POWER_DOWN;
-		phy_write(phy_dev, COPPER_CONTROL_REG, val);
+		/* Doing phy loopback in offline state, phy resuming is
+		 * needed to power up the device.
+		 */
+		err = phy_resume(phy_dev);
+		if (err)
+			goto out;
+
+		err = phy_loopback(phy_dev, true);
 	} else {
-		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
-		phy_write(phy_dev, 1, 0x400);
-		phy_write(phy_dev, 7, 0x200);
-
-		phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
-		val = phy_read(phy_dev, COPPER_CONTROL_REG);
-		val |= PHY_POWER_DOWN;
-		phy_write(phy_dev, COPPER_CONTROL_REG, val);
-
-		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
-		phy_write(phy_dev, 9, 0xF00);
-
-		val = phy_read(phy_dev, COPPER_CONTROL_REG);
-		val &= ~PHY_LOOP_BACK;
-		val |= PHY_POWER_DOWN;
-		phy_write(phy_dev, COPPER_CONTROL_REG, val);
+		err = phy_loopback(phy_dev, false);
+		if (err)
+			goto out;
+
+		err = phy_suspend(phy_dev);
 	}
-	return 0;
+
+out:
+	return err;
 }
 
 static int __lb_setup(struct net_device *ndev,
@@ -332,10 +292,9 @@ static int __lb_setup(struct net_device *ndev,
 
 	switch (loop) {
 	case MAC_INTERNALLOOP_PHY:
-		if ((phy_dev) && (!phy_dev->is_c45)) {
-			ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
-			ret |= h->dev->ops->set_loopback(h, loop, 0x1);
-		}
+		ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
+		if (!ret)
+			ret = h->dev->ops->set_loopback(h, loop, 0x1);
 		break;
 	case MAC_INTERNALLOOP_MAC:
 		if ((h->dev->ops->set_loopback) &&
@@ -346,17 +305,17 @@ static int __lb_setup(struct net_device *ndev,
 		if (h->dev->ops->set_loopback)
 			ret = h->dev->ops->set_loopback(h, loop, 0x1);
 		break;
+	case MAC_LOOP_PHY_NONE:
+		ret = hns_nic_config_phy_loopback(phy_dev, 0x0);
 	case MAC_LOOP_NONE:
-		if ((phy_dev) && (!phy_dev->is_c45))
-			ret |= hns_nic_config_phy_loopback(phy_dev, 0x0);
-
-		if (h->dev->ops->set_loopback) {
+		if (!ret && h->dev->ops->set_loopback) {
 			if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII)
-				ret |= h->dev->ops->set_loopback(h,
+				ret = h->dev->ops->set_loopback(h,
 					MAC_INTERNALLOOP_MAC, 0x0);
 
-			ret |= h->dev->ops->set_loopback(h,
-				MAC_INTERNALLOOP_SERDES, 0x0);
+			if (!ret)
+				ret = h->dev->ops->set_loopback(h,
+					MAC_INTERNALLOOP_SERDES, 0x0);
 		}
 		break;
 	default:
@@ -582,13 +541,16 @@ static int __lb_run_test(struct net_device *ndev,
 	return ret_val;
 }
 
-static int __lb_down(struct net_device *ndev)
+static int __lb_down(struct net_device *ndev, enum hnae_loop loop)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 	struct hnae_handle *h = priv->ae_handle;
 	int ret;
 
-	ret = __lb_setup(ndev, MAC_LOOP_NONE);
+	if (loop == MAC_INTERNALLOOP_PHY)
+		ret = __lb_setup(ndev, MAC_LOOP_PHY_NONE);
+	else
+		ret = __lb_setup(ndev, MAC_LOOP_NONE);
 	if (ret)
 		netdev_err(ndev, "%s: __lb_setup return error(%d)!\n",
 			   __func__,
@@ -644,7 +606,8 @@ static void hns_nic_self_test(struct net_device *ndev,
 			if (!data[test_index]) {
 				data[test_index] = __lb_run_test(
 					ndev, (enum hnae_loop)st_param[i][0]);
-				(void)__lb_down(ndev);
+				(void)__lb_down(ndev,
+						(enum hnae_loop)st_param[i][0]);
 			}
 
 			if (data[test_index])
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 9a74c4e2e193..3e0a695537e2 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1914,7 +1914,7 @@ static struct vio_device_id ibmveth_device_table[] = {
 };
 MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
 
-static struct dev_pm_ops ibmveth_pm_ops = {
+static const struct dev_pm_ops ibmveth_pm_ops = {
 	.resume = ibmveth_resume
 };
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 87db1eb5cc44..a3e694679635 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -763,12 +763,6 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 	if (rc)
 		return rc;
 
-	rc = init_sub_crq_irqs(adapter);
-	if (rc) {
-		netdev_err(netdev, "failed to initialize sub crq irqs\n");
-		return -1;
-	}
-
 	rc = init_stats_token(adapter);
 	if (rc)
 		return rc;
@@ -1803,7 +1797,6 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
 			return rc;
 	}
 
-	rc = init_sub_crq_irqs(adapter);
 	return rc;
 }
 
@@ -3669,6 +3662,13 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 	if (rc) {
 		dev_err(dev, "Initialization of sub crqs failed\n");
 		release_crq_queue(adapter);
+		return rc;
+	}
+
+	rc = init_sub_crq_irqs(adapter);
+	if (rc) {
+		dev_err(dev, "Failed to initialize sub crq irqs\n");
+		release_crq_queue(adapter);
 	}
 
 	return rc;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index c1af47e45d3f..674773b28b2e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -3280,7 +3280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat
 
 	if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
 		mlx4_dbg(dev,
-			 "updating vf %d port %d no link state HW enforcment\n",
+			 "updating vf %d port %d no link state HW enforcement\n",
 			 vf, port);
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
index 1dae8e40fb25..5f41dc92aa68 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -238,7 +238,7 @@ static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state)
 		priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
 	}
 
-	if (mlx4_en_setup_tc(dev, num_tcs))
+	if (mlx4_en_alloc_tx_queue_per_tc(dev, num_tcs))
 		return 1;
 
 	return 0;
@@ -303,7 +303,7 @@ static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets)
 	int has_ets_tc = 0;
 
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		if (ets->prio_tc[i] >= MLX4_EN_NUM_UP) {
+		if (ets->prio_tc[i] >= MLX4_EN_NUM_UP_HIGH) {
 			en_err(priv, "Bad priority in UP <=> TC mapping. TC: %d, UP: %d\n",
 					i, ets->prio_tc[i]);
 			return -EINVAL;
@@ -472,7 +472,7 @@ static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 			goto err;
 		if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc))
 			goto err;
-		if (mlx4_en_setup_tc(dev, 0))
+		if (mlx4_en_alloc_tx_queue_per_tc(dev, 0))
 			goto err;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index e97fbf327594..c751a1d434ad 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1750,7 +1750,8 @@ static void mlx4_en_get_channels(struct net_device *dev,
 	channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP;
 
 	channel->rx_count = priv->rx_ring_num;
-	channel->tx_count = priv->tx_ring_num[TX] / MLX4_EN_NUM_UP;
+	channel->tx_count = priv->tx_ring_num[TX] /
+			    priv->prof->num_up;
 }
 
 static int mlx4_en_set_channels(struct net_device *dev,
@@ -1763,6 +1764,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	int port_up = 0;
 	int xdp_count;
 	int err = 0;
+	u8 up;
 
 	if (!channel->tx_count || !channel->rx_count)
 		return -EINVAL;
@@ -1773,18 +1775,19 @@ static int mlx4_en_set_channels(struct net_device *dev,
 
 	mutex_lock(&mdev->state_lock);
 	xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
-	if (channel->tx_count * MLX4_EN_NUM_UP + xdp_count > MAX_TX_RINGS) {
+	if (channel->tx_count * priv->prof->num_up + xdp_count >
+	    MAX_TX_RINGS) {
 		err = -EINVAL;
 		en_err(priv,
 		       "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
-		       channel->tx_count * MLX4_EN_NUM_UP + xdp_count,
+		       channel->tx_count * priv->prof->num_up  + xdp_count,
 		       MAX_TX_RINGS);
 		goto out;
 	}
 
 	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
 	new_prof.num_tx_rings_p_up = channel->tx_count;
-	new_prof.tx_ring_num[TX] = channel->tx_count * MLX4_EN_NUM_UP;
+	new_prof.tx_ring_num[TX] = channel->tx_count * priv->prof->num_up;
 	new_prof.tx_ring_num[TX_XDP] = xdp_count;
 	new_prof.rx_ring_num = channel->rx_count;
 
@@ -1799,11 +1802,11 @@ static int mlx4_en_set_channels(struct net_device *dev,
 
 	mlx4_en_safe_replace_resources(priv, tmp);
 
-	netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
-	if (netdev_get_num_tc(dev))
-		mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP);
+	up = (priv->prof->num_up == MLX4_EN_NUM_UP_LOW) ?
+				    0 : priv->prof->num_up;
+	mlx4_en_setup_tc(dev, up);
 
 	en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]);
 	en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 56cdf38d150e..2b0cbca4beb5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -169,8 +169,10 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 		params->prof[i].tx_ppp = pfctx;
 		params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
 		params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
+		params->prof[i].num_up = MLX4_EN_NUM_UP_LOW;
+		params->prof[i].num_tx_rings_p_up = params->num_tx_rings_p_up;
 		params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up *
-			MLX4_EN_NUM_UP;
+			params->prof[i].num_up;
 		params->prof[i].rss_rings = 0;
 		params->prof[i].inline_thold = inline_thold;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 9da76e3be2fc..3a291fc1780a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -60,11 +60,11 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 	int i;
 	unsigned int offset = 0;
 
-	if (up && up != MLX4_EN_NUM_UP)
+	if (up && up != MLX4_EN_NUM_UP_HIGH)
 		return -EINVAL;
 
 	netdev_set_num_tc(dev, up);
-
+	netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
 	/* Partition Tx queues evenly amongst UP's */
 	for (i = 0; i < up; i++) {
 		netdev_set_tc_queue(dev, i, priv->num_tx_rings_p_up, offset);
@@ -86,6 +86,50 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 	return 0;
 }
 
+int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
+	struct mlx4_en_priv *tmp;
+	int port_up = 0;
+	int err = 0;
+
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	mutex_lock(&mdev->state_lock);
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	new_prof.num_up = (tc == 0) ? MLX4_EN_NUM_UP_LOW :
+				      MLX4_EN_NUM_UP_HIGH;
+	new_prof.tx_ring_num[TX] = new_prof.num_tx_rings_p_up *
+				   new_prof.num_up;
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
+	if (err)
+		goto out;
+
+	if (priv->port_up) {
+		port_up = 1;
+		mlx4_en_stop_port(dev, 1);
+	}
+
+	mlx4_en_safe_replace_resources(priv, tmp);
+	if (port_up) {
+		err = mlx4_en_start_port(dev);
+		if (err) {
+			en_err(priv, "Failed starting port for setup TC\n");
+			goto out;
+		}
+	}
+
+	err = mlx4_en_setup_tc(dev, tc);
+out:
+	mutex_unlock(&mdev->state_lock);
+	kfree(tmp);
+	return err;
+}
+
 static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle,
 			      u32 chain_index, __be16 proto,
 			      struct tc_to_netdev *tc)
@@ -93,9 +137,12 @@ static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle,
 	if (tc->type != TC_SETUP_MQPRIO)
 		return -EINVAL;
 
+	if (tc->mqprio->num_tc && tc->mqprio->num_tc != MLX4_EN_NUM_UP_HIGH)
+		return -EINVAL;
+
 	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-	return mlx4_en_setup_tc(dev, tc->mqprio->num_tc);
+	return mlx4_en_alloc_tx_queue_per_tc(dev, tc->mqprio->num_tc);
 }
 
 #ifdef CONFIG_RFS_ACCEL
@@ -2144,7 +2191,7 @@ static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
 
 	memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config,
 	       sizeof(dst->hwtstamp_config));
-	dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up;
+	dst->num_tx_rings_p_up = prof->num_tx_rings_p_up;
 	dst->rx_ring_num = prof->rx_ring_num;
 	dst->flags = prof->flags;
 	dst->mdev = src->mdev;
@@ -2197,6 +2244,7 @@ static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
 		dst->tx_ring[t] = src->tx_ring[t];
 		dst->tx_cq[t] = src->tx_cq[t];
 	}
+	dst->num_tx_rings_p_up = src->num_tx_rings_p_up;
 	dst->rx_ring_num = src->rx_ring_num;
 	memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile));
 }
@@ -2780,7 +2828,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 	if (priv->tx_ring_num[TX] + xdp_ring_num > MAX_TX_RINGS) {
 		tx_changed = 1;
 		new_prof.tx_ring_num[TX] =
-			MAX_TX_RINGS - ALIGN(xdp_ring_num, MLX4_EN_NUM_UP);
+			MAX_TX_RINGS - ALIGN(xdp_ring_num, priv->prof->num_up);
 		en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n");
 	}
 
@@ -3271,7 +3319,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 		priv->flags |= MLX4_EN_DCB_ENABLED;
 		priv->cee_config.pfc_state = false;
 
-		for (i = 0; i < MLX4_EN_NUM_UP; i++)
+		for (i = 0; i < MLX4_EN_NUM_UP_HIGH; i++)
 			priv->cee_config.dcb_pfc[i] = pfc_disabled;
 
 		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index a6b0db0e0383..86d2d42d658d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -63,7 +63,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
 	context->local_qpn = cpu_to_be32(qpn);
 	context->pri_path.ackto = 1 & 0x07;
 	context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;
-	if (user_prio >= 0) {
+	/* force user priority per tx ring */
+	if (user_prio >= 0 && priv->prof->num_up == MLX4_EN_NUM_UP_HIGH) {
 		context->pri_path.sched_queue |= user_prio << 3;
 		context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 7d69d939ee2d..4f3a9b27ce4a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -691,15 +691,11 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	u16 rings_p_up = priv->num_tx_rings_p_up;
-	u8 up = 0;
 
 	if (netdev_get_num_tc(dev))
 		return skb_tx_hash(dev, skb);
 
-	if (skb_vlan_tag_present(skb))
-		up = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
-
-	return fallback(dev, skb) % rings_p_up + up * rings_p_up;
+	return fallback(dev, skb) % rings_p_up;
 }
 
 static void mlx4_bf_copy(void __iomem *dst, const void *src,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 457e070bca46..a27c9c13a36e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -91,7 +91,7 @@ module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
 			   "probe_vf=port1,port2,port1+2");
 
-int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
 module_param_named(log_num_mgm_entry_size,
 			mlx4_log_num_mgm_entry_size, int, 0444);
 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 6ea2b7a0c34d..30616cd0140d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -230,7 +230,6 @@ do {									\
 #define mlx4_warn(mdev, format, ...)					\
 	dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
 
-extern int mlx4_log_num_mgm_entry_size;
 extern int log_mtts_per_seg;
 extern int mlx4_internal_err_reset;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 963b77d51b48..d350b2158104 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -115,11 +115,12 @@
 #define MLX4_EN_SMALL_PKT_SIZE		64
 #define MLX4_EN_MIN_TX_RING_P_UP	1
 #define MLX4_EN_MAX_TX_RING_P_UP	32
-#define MLX4_EN_NUM_UP			8
+#define MLX4_EN_NUM_UP_LOW		1
+#define MLX4_EN_NUM_UP_HIGH		8
 #define MLX4_EN_DEF_RX_RING_SIZE  	1024
 #define MLX4_EN_DEF_TX_RING_SIZE	MLX4_EN_DEF_RX_RING_SIZE
 #define MAX_TX_RINGS			(MLX4_EN_MAX_TX_RING_P_UP * \
-					 MLX4_EN_NUM_UP)
+					 MLX4_EN_NUM_UP_HIGH)
 
 #define MLX4_EN_DEFAULT_TX_WORK		256
 
@@ -386,6 +387,7 @@ struct mlx4_en_port_profile {
 	u8 rx_ppp;
 	u8 tx_pause;
 	u8 tx_ppp;
+	u8 num_up;
 	int rss_rings;
 	int inline_thold;
 	struct hwtstamp_config hwtstamp_config;
@@ -485,7 +487,7 @@ enum dcb_pfc_type {
 
 struct mlx4_en_cee_config {
 	bool	pfc_state;
-	enum	dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP];
+	enum	dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP_HIGH];
 };
 #endif
 
@@ -761,6 +763,7 @@ extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops;
 #endif
 
 int mlx4_en_setup_tc(struct net_device *dev, u8 up);
+int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc);
 
 #ifdef CONFIG_RFS_ACCEL
 void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index cf1ef48bfd8d..5aee05992f27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -11,9 +11,13 @@ config MLX5_CORE
 	  Core driver for low level functionality of the ConnectX-4 and
 	  Connect-IB cards by Mellanox Technologies.
 
+config MLX5_ACCEL
+	bool
+
 config MLX5_FPGA
         bool "Mellanox Technologies Innova support"
         depends on MLX5_CORE
+	select MLX5_ACCEL
         ---help---
           Build support for the Innova family of network cards by Mellanox
           Technologies. Innova network cards are comprised of a ConnectX chip
@@ -48,3 +52,15 @@ config MLX5_CORE_IPOIB
 	default n
 	---help---
 	  MLX5 IPoIB offloads & acceleration support.
+
+config MLX5_EN_IPSEC
+	bool "IPSec XFRM cryptography-offload accelaration"
+	depends on MLX5_ACCEL
+	depends on MLX5_CORE_EN
+	depends on XFRM_OFFLOAD
+	depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+	default n
+	---help---
+	  Build support for IPsec cryptography-offload accelaration in the NIC.
+	  Note: Support for hardware with this capability needs to be selected
+	  for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5ad093a21a6e..ca367445f864 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -4,9 +4,12 @@ subdir-ccflags-y += -I$(src)
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-		fs_counters.o rl.o lag.o dev.o
+		fs_counters.o rl.o lag.o dev.o lib/gid.o
 
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o
+mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
+
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
+		fpga/ipsec.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
 		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
@@ -16,3 +19,6 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
+
+mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
+		en_accel/ipsec_stats.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
new file mode 100644
index 000000000000..53e69edaedde
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "accel/ipsec.h"
+#include "mlx5_core.h"
+#include "fpga/ipsec.h"
+
+void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+				   struct mlx5_accel_ipsec_sa *cmd)
+{
+	if (!MLX5_IPSEC_DEV(mdev))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd);
+}
+
+int mlx5_accel_ipsec_sa_cmd_wait(void *ctx)
+{
+	return mlx5_fpga_ipsec_sa_cmd_wait(ctx);
+}
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+	return mlx5_fpga_ipsec_device_caps(mdev);
+}
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+	return mlx5_fpga_ipsec_counters_count(mdev);
+}
+
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+				   unsigned int count)
+{
+	return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
+}
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+	return mlx5_fpga_ipsec_init(mdev);
+}
+
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+	mlx5_fpga_ipsec_cleanup(mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
new file mode 100644
index 000000000000..d6e20fea9554
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_IPSEC_H__
+#define __MLX5_ACCEL_IPSEC_H__
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#include <linux/mlx5/driver.h>
+
+enum {
+	MLX5_ACCEL_IPSEC_DEVICE = BIT(1),
+	MLX5_ACCEL_IPSEC_IPV6 = BIT(2),
+	MLX5_ACCEL_IPSEC_ESP = BIT(3),
+	MLX5_ACCEL_IPSEC_LSO = BIT(4),
+};
+
+#define MLX5_IPSEC_SADB_IP_AH       BIT(7)
+#define MLX5_IPSEC_SADB_IP_ESP      BIT(6)
+#define MLX5_IPSEC_SADB_SA_VALID    BIT(5)
+#define MLX5_IPSEC_SADB_SPI_EN      BIT(4)
+#define MLX5_IPSEC_SADB_DIR_SX      BIT(3)
+#define MLX5_IPSEC_SADB_IPV6        BIT(2)
+
+enum {
+	MLX5_IPSEC_CMD_ADD_SA = 0,
+	MLX5_IPSEC_CMD_DEL_SA = 1,
+};
+
+enum mlx5_accel_ipsec_enc_mode {
+	MLX5_IPSEC_SADB_MODE_NONE = 0,
+	MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1,
+	MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3,
+};
+
+#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
+			      MLX5_ACCEL_IPSEC_DEVICE)
+
+struct mlx5_accel_ipsec_sa {
+	__be32 cmd;
+	u8 key_enc[32];
+	u8 key_auth[32];
+	__be32 sip[4];
+	__be32 dip[4];
+	union {
+		struct {
+			__be32 reserved;
+			u8 salt_iv[8];
+			__be32 salt;
+		} __packed gcm;
+		struct {
+			u8 salt[16];
+		} __packed cbc;
+	};
+	__be32 spi;
+	__be32 sw_sa_handle;
+	__be16 tfclen;
+	u8 enc_mode;
+	u8 sip_masklen;
+	u8 dip_masklen;
+	u8 flags;
+	u8 reserved[2];
+} __packed;
+
+/**
+ * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command
+ * @mdev: mlx5 device
+ * @cmd: command to execute
+ * May be called from atomic context. Returns context pointer, or error
+ * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic
+ * context, to cleanup the context pointer
+ */
+void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+				   struct mlx5_accel_ipsec_sa *cmd);
+
+/**
+ * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion
+ * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec
+ * Sleeps (killable) until command execution is complete.
+ * Returns the command result, or -EINTR if killed
+ */
+int mlx5_accel_ipsec_sa_cmd_wait(void *context);
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+				   unsigned int count);
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+#define MLX5_IPSEC_DEV(mdev) false
+
+static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
+
+static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif
+
+#endif	/* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 4d5bd01f1ebb..f5a2c605749f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -307,6 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
 	case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
 	case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
+	case MLX5_CMD_OP_FPGA_DESTROY_QP:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -419,6 +420,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
 	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+	case MLX5_CMD_OP_FPGA_CREATE_QP:
+	case MLX5_CMD_OP_FPGA_MODIFY_QP:
+	case MLX5_CMD_OP_FPGA_QUERY_QP:
+	case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -585,6 +590,11 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
 	MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
+	MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
+	MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP);
+	MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
+	MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
+	MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index eef0a50e2388..e1b7ddfecd01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -328,6 +328,7 @@ struct mlx5e_sq_dma {
 
 enum {
 	MLX5E_SQ_STATE_ENABLED,
+	MLX5E_SQ_STATE_IPSEC,
 };
 
 struct mlx5e_sq_wqe_info {
@@ -784,6 +785,9 @@ struct mlx5e_priv {
 
 	const struct mlx5e_profile *profile;
 	void                      *ppriv;
+#ifdef CONFIG_MLX5_EN_IPSEC
+	struct mlx5e_ipsec        *ipsec;
+#endif
 };
 
 struct mlx5e_profile {
@@ -833,7 +837,6 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq);
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
-struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
 
 void mlx5e_rx_am(struct mlx5e_rq *rq);
 void mlx5e_rx_am_work(struct work_struct *work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
new file mode 100644
index 000000000000..bac5103efad3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/internal/geniv.h>
+#include <crypto/aead.h>
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+
+#include "en.h"
+#include "accel/ipsec.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
+
+struct mlx5e_ipsec_sa_entry {
+	struct hlist_node hlist; /* Item in SADB_RX hashtable */
+	unsigned int handle; /* Handle in SADB_RX */
+	struct xfrm_state *x;
+	struct mlx5e_ipsec *ipsec;
+	void *context;
+};
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
+					      unsigned int handle)
+{
+	struct mlx5e_ipsec_sa_entry *sa_entry;
+	struct xfrm_state *ret = NULL;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle)
+		if (sa_entry->handle == handle) {
+			ret = sa_entry->x;
+			xfrm_state_hold(ret);
+			break;
+		}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+	ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto out;
+
+	sa_entry->handle = ret;
+	hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
+	ret = 0;
+
+out:
+	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+	return ret;
+}
+
+static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+	hash_del_rcu(&sa_entry->hlist);
+	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+	unsigned long flags;
+
+	/* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */
+	synchronize_rcu();
+	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+	ida_simple_remove(&ipsec->halloc, sa_entry->handle);
+	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x)
+{
+	unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4;
+
+	switch (key_len) {
+	case 16:
+		return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128;
+	case 32:
+		return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128;
+	default:
+		netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n",
+			    key_len, x->aead->alg_name);
+		return -1;
+	}
+}
+
+static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry,
+				    struct mlx5_accel_ipsec_sa *hw_sa)
+{
+	struct xfrm_state *x = sa_entry->x;
+	struct aead_geniv_ctx *geniv_ctx;
+	unsigned int crypto_data_len;
+	struct crypto_aead *aead;
+	unsigned int key_len;
+	int ivsize;
+
+	memset(hw_sa, 0, sizeof(*hw_sa));
+
+	if (op == MLX5_IPSEC_CMD_ADD_SA) {
+		crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+		key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+		aead = x->data;
+		geniv_ctx = crypto_aead_ctx(aead);
+		ivsize = crypto_aead_ivsize(aead);
+
+		memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len);
+		/* Duplicate 128 bit key twice according to HW layout */
+		if (key_len == 16)
+			memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len);
+		memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize);
+		hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len));
+	}
+
+	hw_sa->cmd = htonl(op);
+	hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN;
+	if (x->props.family == AF_INET) {
+		hw_sa->sip[3] = x->props.saddr.a4;
+		hw_sa->dip[3] = x->id.daddr.a4;
+		hw_sa->sip_masklen = 32;
+		hw_sa->dip_masklen = 32;
+	} else {
+		memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip));
+		memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip));
+		hw_sa->sip_masklen = 128;
+		hw_sa->dip_masklen = 128;
+		hw_sa->flags |= MLX5_IPSEC_SADB_IPV6;
+	}
+	hw_sa->spi = x->id.spi;
+	hw_sa->sw_sa_handle = htonl(sa_entry->handle);
+	switch (x->id.proto) {
+	case IPPROTO_ESP:
+		hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP;
+		break;
+	case IPPROTO_AH:
+		hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH;
+		break;
+	default:
+		break;
+	}
+	hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x);
+	if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND))
+		hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX;
+}
+
+static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
+{
+	struct net_device *netdev = x->xso.dev;
+	struct mlx5e_priv *priv;
+
+	priv = netdev_priv(netdev);
+
+	if (x->props.aalgo != SADB_AALG_NONE) {
+		netdev_info(netdev, "Cannot offload authenticated xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
+		netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n");
+		return -EINVAL;
+	}
+	if (x->props.calgo != SADB_X_CALG_NONE) {
+		netdev_info(netdev, "Cannot offload compressed xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.flags & XFRM_STATE_ESN) {
+		netdev_info(netdev, "Cannot offload ESN xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.family != AF_INET &&
+	    x->props.family != AF_INET6) {
+		netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n");
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_TRANSPORT &&
+	    x->props.mode != XFRM_MODE_TUNNEL) {
+		dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n");
+		return -EINVAL;
+	}
+	if (x->id.proto != IPPROTO_ESP) {
+		netdev_info(netdev, "Only ESP xfrm state may be offloaded\n");
+		return -EINVAL;
+	}
+	if (x->encap) {
+		netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n");
+		return -EINVAL;
+	}
+	if (!x->aead) {
+		netdev_info(netdev, "Cannot offload xfrm states without aead\n");
+		return -EINVAL;
+	}
+	if (x->aead->alg_icv_len != 128) {
+		netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+		return -EINVAL;
+	}
+	if ((x->aead->alg_key_len != 128 + 32) &&
+	    (x->aead->alg_key_len != 256 + 32)) {
+		netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EINVAL;
+	}
+	if (x->tfcpad) {
+		netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n");
+		return -EINVAL;
+	}
+	if (!x->geniv) {
+		netdev_info(netdev, "Cannot offload xfrm states without geniv\n");
+		return -EINVAL;
+	}
+	if (strcmp(x->geniv, "seqiv")) {
+		netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n");
+		return -EINVAL;
+	}
+	if (x->props.family == AF_INET6 &&
+	    !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) {
+		netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int mlx5e_xfrm_add_state(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
+	struct net_device *netdev = x->xso.dev;
+	struct mlx5_accel_ipsec_sa hw_sa;
+	struct mlx5e_priv *priv;
+	void *context;
+	int err;
+
+	priv = netdev_priv(netdev);
+
+	err = mlx5e_xfrm_validate_state(x);
+	if (err)
+		return err;
+
+	sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+	if (!sa_entry) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	sa_entry->x = x;
+	sa_entry->ipsec = priv->ipsec;
+
+	/* Add the SA to handle processed incoming packets before the add SA
+	 * completion was received
+	 */
+	if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		err = mlx5e_ipsec_sadb_rx_add(sa_entry);
+		if (err) {
+			netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
+			goto err_entry;
+		}
+	}
+
+	mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa);
+	context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
+	if (IS_ERR(context)) {
+		err = PTR_ERR(context);
+		goto err_sadb_rx;
+	}
+
+	err = mlx5_accel_ipsec_sa_cmd_wait(context);
+	if (err)
+		goto err_sadb_rx;
+
+	x->xso.offload_handle = (unsigned long)sa_entry;
+	goto out;
+
+err_sadb_rx:
+	if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		mlx5e_ipsec_sadb_rx_del(sa_entry);
+		mlx5e_ipsec_sadb_rx_free(sa_entry);
+	}
+err_entry:
+	kfree(sa_entry);
+out:
+	return err;
+}
+
+static void mlx5e_xfrm_del_state(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa_entry;
+	struct mlx5_accel_ipsec_sa hw_sa;
+	void *context;
+
+	if (!x->xso.offload_handle)
+		return;
+
+	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+	WARN_ON(sa_entry->x != x);
+
+	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+		mlx5e_ipsec_sadb_rx_del(sa_entry);
+
+	mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa);
+	context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
+	if (IS_ERR(context))
+		return;
+
+	sa_entry->context = context;
+}
+
+static void mlx5e_xfrm_free_state(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa_entry;
+	int res;
+
+	if (!x->xso.offload_handle)
+		return;
+
+	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+	WARN_ON(sa_entry->x != x);
+
+	res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context);
+	sa_entry->context = NULL;
+	if (res) {
+		/* Leftover object will leak */
+		return;
+	}
+
+	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+		mlx5e_ipsec_sadb_rx_free(sa_entry);
+
+	kfree(sa_entry);
+}
+
+int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ipsec *ipsec = NULL;
+
+	if (!MLX5_IPSEC_DEV(priv->mdev)) {
+		netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
+		return 0;
+	}
+
+	ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+	if (!ipsec)
+		return -ENOMEM;
+
+	hash_init(ipsec->sadb_rx);
+	spin_lock_init(&ipsec->sadb_rx_lock);
+	ida_init(&ipsec->halloc);
+	ipsec->en_priv = priv;
+	ipsec->en_priv->ipsec = ipsec;
+	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
+	return 0;
+}
+
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ipsec *ipsec = priv->ipsec;
+
+	if (!ipsec)
+		return;
+
+	ida_destroy(&ipsec->halloc);
+	kfree(ipsec);
+	priv->ipsec = NULL;
+}
+
+static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+	if (x->props.family == AF_INET) {
+		/* Offload with IPv4 options is not supported yet */
+		if (ip_hdr(skb)->ihl > 5)
+			return false;
+	} else {
+		/* Offload with IPv6 extension headers is not support yet */
+		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+			return false;
+	}
+
+	return true;
+}
+
+static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
+	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
+	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
+	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
+	.xdo_dev_offload_ok	= mlx5e_ipsec_offload_ok,
+};
+
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct net_device *netdev = priv->netdev;
+
+	if (!priv->ipsec)
+		return;
+
+	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) ||
+	    !MLX5_CAP_ETH(mdev, swp)) {
+		mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
+		return;
+	}
+
+	mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
+	netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
+	netdev->features |= NETIF_F_HW_ESP;
+	netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+	if (!MLX5_CAP_ETH(mdev, swp_csum)) {
+		mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
+		return;
+	}
+
+	netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
+	netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
+
+	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) ||
+	    !MLX5_CAP_ETH(mdev, swp_lso)) {
+		mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
+		return;
+	}
+
+	mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
+	netdev->features |= NETIF_F_GSO_ESP;
+	netdev->hw_features |= NETIF_F_GSO_ESP;
+	netdev->hw_enc_features |= NETIF_F_GSO_ESP;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
new file mode 100644
index 000000000000..56e00baf16cc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_H__
+#define __MLX5E_IPSEC_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/mlx5/device.h>
+#include <net/xfrm.h>
+#include <linux/idr.h>
+
+#define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
+#define MLX5E_METADATA_ETHER_LEN 8
+
+struct mlx5e_priv;
+
+struct mlx5e_ipsec_sw_stats {
+	atomic64_t ipsec_rx_drop_sp_alloc;
+	atomic64_t ipsec_rx_drop_sadb_miss;
+	atomic64_t ipsec_rx_drop_syndrome;
+	atomic64_t ipsec_tx_drop_bundle;
+	atomic64_t ipsec_tx_drop_no_state;
+	atomic64_t ipsec_tx_drop_not_ip;
+	atomic64_t ipsec_tx_drop_trailer;
+	atomic64_t ipsec_tx_drop_metadata;
+};
+
+struct mlx5e_ipsec_stats {
+	u64 ipsec_dec_in_packets;
+	u64 ipsec_dec_out_packets;
+	u64 ipsec_dec_bypass_packets;
+	u64 ipsec_enc_in_packets;
+	u64 ipsec_enc_out_packets;
+	u64 ipsec_enc_bypass_packets;
+	u64 ipsec_dec_drop_packets;
+	u64 ipsec_dec_auth_fail_packets;
+	u64 ipsec_enc_drop_packets;
+	u64 ipsec_add_sa_success;
+	u64 ipsec_add_sa_fail;
+	u64 ipsec_del_sa_success;
+	u64 ipsec_del_sa_fail;
+	u64 ipsec_cmd_drop;
+};
+
+struct mlx5e_ipsec {
+	struct mlx5e_priv *en_priv;
+	DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+	spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
+	struct ida halloc;
+	struct mlx5e_ipsec_sw_stats sw_stats;
+	struct mlx5e_ipsec_stats stats;
+};
+
+void mlx5e_ipsec_build_inverse_table(void);
+int mlx5e_ipsec_init(struct mlx5e_priv *priv);
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
+
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
+					      unsigned int handle);
+
+#else
+
+static inline void mlx5e_ipsec_build_inverse_table(void)
+{
+}
+
+static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+	return 0;
+}
+
+static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+}
+
+static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+	return 0;
+}
+
+static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv,
+					  uint8_t *data)
+{
+	return 0;
+}
+
+static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+	return 0;
+}
+
+#endif
+
+#endif	/* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
new file mode 100644
index 000000000000..4a78aefdf157
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/aead.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/ipsec.h"
+#include "en.h"
+
+enum {
+	MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
+	MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
+};
+
+struct mlx5e_ipsec_rx_metadata {
+	unsigned char   reserved;
+	__be32		sa_handle;
+} __packed;
+
+enum {
+	MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
+	MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
+};
+
+struct mlx5e_ipsec_tx_metadata {
+	__be16 mss_inv;         /* 1/MSS in 16bit fixed point, only for LSO */
+	__be16 seq;             /* LSBs of the first TCP seq, only for LSO */
+	u8     esp_next_proto;  /* Next protocol of ESP */
+} __packed;
+
+struct mlx5e_ipsec_metadata {
+	unsigned char syndrome;
+	union {
+		unsigned char raw[5];
+		/* from FPGA to host, on successful decrypt */
+		struct mlx5e_ipsec_rx_metadata rx;
+		/* from host to FPGA */
+		struct mlx5e_ipsec_tx_metadata tx;
+	} __packed content;
+	/* packet type ID field	*/
+	__be16 ethertype;
+} __packed;
+
+#define MAX_LSO_MSS 2048
+
+/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */
+static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS];
+
+static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb)
+{
+	return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size];
+}
+
+static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb)
+{
+	struct mlx5e_ipsec_metadata *mdata;
+	struct ethhdr *eth;
+
+	if (unlikely(skb_cow_head(skb, sizeof(*mdata))))
+		return ERR_PTR(-ENOMEM);
+
+	eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata));
+	skb->mac_header -= sizeof(*mdata);
+	mdata = (struct mlx5e_ipsec_metadata *)(eth + 1);
+
+	memmove(skb->data, skb->data + sizeof(*mdata),
+		2 * ETH_ALEN);
+
+	eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+
+	memset(mdata->content.raw, 0, sizeof(mdata->content.raw));
+	return mdata;
+}
+
+static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+{
+	unsigned int alen = crypto_aead_authsize(x->data);
+	struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
+	struct iphdr *ipv4hdr = ip_hdr(skb);
+	unsigned int trailer_len;
+	u8 plen;
+	int ret;
+
+	ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1);
+	if (unlikely(ret))
+		return ret;
+
+	trailer_len = alen + plen + 2;
+
+	pskb_trim(skb, skb->len - trailer_len);
+	if (skb->protocol == htons(ETH_P_IP)) {
+		ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+		ip_send_check(ipv4hdr);
+	} else {
+		ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) -
+					     trailer_len);
+	}
+	return 0;
+}
+
+static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
+				struct mlx5_wqe_eth_seg *eseg, u8 mode,
+				struct xfrm_offload *xo)
+{
+	u8 proto;
+
+	/* Tunnel Mode:
+	 * SWP:      OutL3       InL3  InL4
+	 * Pkt: MAC  IP     ESP  IP    L4
+	 *
+	 * Transport Mode:
+	 * SWP:      OutL3       InL4
+	 *           InL3
+	 * Pkt: MAC  IP     ESP  L4
+	 *
+	 * Offsets are in 2-byte words, counting from start of frame
+	 */
+	eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+	if (skb->protocol == htons(ETH_P_IPV6))
+		eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+
+	if (mode == XFRM_MODE_TUNNEL) {
+		eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+		if (xo->proto == IPPROTO_IPV6) {
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+			proto = inner_ipv6_hdr(skb)->nexthdr;
+		} else {
+			proto = inner_ip_hdr(skb)->protocol;
+		}
+	} else {
+		eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+		if (skb->protocol == htons(ETH_P_IPV6))
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+		proto = xo->proto;
+	}
+	switch (proto) {
+	case IPPROTO_UDP:
+		eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+		/* Fall through */
+	case IPPROTO_TCP:
+		eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+		break;
+	}
+}
+
+static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo)
+{
+	int iv_offset;
+	__be64 seqno;
+
+	/* Place the SN in the IV field */
+	seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+	iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+	skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
+				     struct mlx5e_ipsec_metadata *mdata,
+				     struct xfrm_offload *xo)
+{
+	struct ip_esp_hdr *esph;
+	struct tcphdr *tcph;
+
+	if (skb_is_gso(skb)) {
+		/* Add LSO metadata indication */
+		esph = ip_esp_hdr(skb);
+		tcph = inner_tcp_hdr(skb);
+		netdev_dbg(skb->dev, "   Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n",
+			   skb->network_header,
+			   skb->transport_header,
+			   skb->inner_network_header,
+			   skb->inner_transport_header);
+		netdev_dbg(skb->dev, "   Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n",
+			   skb->len, skb_shinfo(skb)->gso_size,
+			   ntohs(tcph->source), ntohs(tcph->dest),
+			   ntohl(tcph->seq), ntohl(esph->seq_no));
+		mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP;
+		mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb);
+		mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF);
+	} else {
+		mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD;
+	}
+	mdata->content.tx.esp_next_proto = xo->proto;
+
+	netdev_dbg(skb->dev, "   TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n",
+		   mdata->syndrome, mdata->content.tx.esp_next_proto,
+		   ntohs(mdata->content.tx.mss_inv),
+		   ntohs(mdata->content.tx.seq));
+}
+
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+					  struct mlx5e_tx_wqe *wqe,
+					  struct sk_buff *skb)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct mlx5e_ipsec_metadata *mdata;
+	struct xfrm_state *x;
+
+	if (!xo)
+		return skb;
+
+	if (unlikely(skb->sp->len != 1)) {
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
+		goto drop;
+	}
+
+	x = xfrm_input_state(skb);
+	if (unlikely(!x)) {
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state);
+		goto drop;
+	}
+
+	if (unlikely(!x->xso.offload_handle ||
+		     (skb->protocol != htons(ETH_P_IP) &&
+		      skb->protocol != htons(ETH_P_IPV6)))) {
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip);
+		goto drop;
+	}
+
+	if (!skb_is_gso(skb))
+		if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) {
+			atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
+			goto drop;
+		}
+	mdata = mlx5e_ipsec_add_metadata(skb);
+	if (unlikely(IS_ERR(mdata))) {
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
+		goto drop;
+	}
+	mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
+	mlx5e_ipsec_set_iv(skb, xo);
+	mlx5e_ipsec_set_metadata(skb, mdata, xo);
+
+	return skb;
+
+drop:
+	kfree_skb(skb);
+	return NULL;
+}
+
+static inline struct xfrm_state *
+mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
+		     struct mlx5e_ipsec_metadata *mdata)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct xfrm_offload *xo;
+	struct xfrm_state *xs;
+	u32 sa_handle;
+
+	skb->sp = secpath_dup(skb->sp);
+	if (unlikely(!skb->sp)) {
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
+		return NULL;
+	}
+
+	sa_handle = be32_to_cpu(mdata->content.rx.sa_handle);
+	xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle);
+	if (unlikely(!xs)) {
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
+		return NULL;
+	}
+
+	skb->sp->xvec[skb->sp->len++] = xs;
+	skb->sp->olen++;
+
+	xo = xfrm_offload(skb);
+	xo->flags = CRYPTO_DONE;
+	switch (mdata->syndrome) {
+	case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
+		xo->status = CRYPTO_SUCCESS;
+		break;
+	case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
+		xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
+		break;
+	default:
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
+		return NULL;
+	}
+	return xs;
+}
+
+struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
+					  struct sk_buff *skb)
+{
+	struct mlx5e_ipsec_metadata *mdata;
+	struct ethhdr *old_eth;
+	struct ethhdr *new_eth;
+	struct xfrm_state *xs;
+	__be16 *ethtype;
+
+	/* Detect inline metadata */
+	if (skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN)
+		return skb;
+	ethtype = (__be16 *)(skb->data + ETH_ALEN * 2);
+	if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE))
+		return skb;
+
+	/* Use the metadata */
+	mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN);
+	xs = mlx5e_ipsec_build_sp(netdev, skb, mdata);
+	if (unlikely(!xs)) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	/* Remove the metadata from the buffer */
+	old_eth = (struct ethhdr *)skb->data;
+	new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN);
+	memmove(new_eth, old_eth, 2 * ETH_ALEN);
+	/* Ethertype is already in its new place */
+	skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN);
+
+	return skb;
+}
+
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+			       netdev_features_t features)
+{
+	struct xfrm_state *x;
+
+	if (skb->sp && skb->sp->len) {
+		x = skb->sp->xvec[0];
+		if (x && x->xso.offload_handle)
+			return true;
+	}
+	return false;
+}
+
+void mlx5e_ipsec_build_inverse_table(void)
+{
+	u16 mss_inv;
+	u32 mss;
+
+	/* Calculate 1/x inverse table for use in GSO data path.
+	 * Using this table, we provide the IPSec accelerator with the value of
+	 * 1/gso_size so that it can infer the position of each segment inside
+	 * the GSO, and increment the ESP sequence number, and generate the IV.
+	 * The HW needs this value in Q0.16 fixed-point number format
+	 */
+	mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
+	for (mss = 2; mss < MAX_LSO_MSS; mss++) {
+		mss_inv = ((1ULL << 32) / mss) >> 16;
+		mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
new file mode 100644
index 000000000000..e37ae2598dbb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_RXTX_H__
+#define __MLX5E_IPSEC_RXTX_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/skbuff.h>
+#include "en.h"
+
+struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
+					  struct sk_buff *skb);
+void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+void mlx5e_ipsec_inverse_table_init(void);
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+			       netdev_features_t features);
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+					  struct mlx5e_tx_wqe *wqe,
+					  struct sk_buff *skb);
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
+
+#endif /* __MLX5E_IPSEC_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
new file mode 100644
index 000000000000..6fea59223dc4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "accel/ipsec.h"
+#include "fpga/sdk.h"
+#include "en_accel/ipsec.h"
+
+static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) },
+};
+
+static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+	atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc)
+#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
+
+#define NUM_IPSEC_COUNTERS (NUM_IPSEC_HW_COUNTERS + NUM_IPSEC_SW_COUNTERS)
+
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+	if (!priv->ipsec)
+		return 0;
+
+	return NUM_IPSEC_COUNTERS;
+}
+
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+	unsigned int i, idx = 0;
+
+	if (!priv->ipsec)
+		return 0;
+
+	for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       mlx5e_ipsec_hw_stats_desc[i].format);
+
+	for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       mlx5e_ipsec_sw_stats_desc[i].format);
+
+	return NUM_IPSEC_COUNTERS;
+}
+
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+	int ret;
+
+	if (!priv->ipsec)
+		return;
+
+	ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats,
+					     NUM_IPSEC_HW_COUNTERS);
+	if (ret)
+		memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats));
+}
+
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+	int i, idx = 0;
+
+	if (!priv->ipsec)
+		return 0;
+
+	for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats,
+						   mlx5e_ipsec_hw_stats_desc, i);
+
+	for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats,
+						      mlx5e_ipsec_sw_stats_desc, i);
+
+	return NUM_IPSEC_COUNTERS;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 16b1e96a7050..917fade5f5d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -31,6 +31,7 @@
  */
 
 #include "en.h"
+#include "en_accel/ipsec.h"
 
 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
 			       struct ethtool_drvinfo *drvinfo)
@@ -186,7 +187,8 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
 		       MLX5E_NUM_SQ_STATS(priv) +
 		       MLX5E_NUM_PFC_COUNTERS(priv) +
 		       ARRAY_SIZE(mlx5e_pme_status_desc) +
-		       ARRAY_SIZE(mlx5e_pme_error_desc);
+		       ARRAY_SIZE(mlx5e_pme_error_desc) +
+		       mlx5e_ipsec_get_count(priv);
 
 	case ETH_SS_PRIV_FLAGS:
 		return ARRAY_SIZE(mlx5e_priv_flags);
@@ -275,6 +277,9 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 	for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format);
 
+	/* IPSec counters */
+	idx += mlx5e_ipsec_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return;
 
@@ -403,6 +408,9 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
 		data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
 						   mlx5e_pme_error_desc, i);
 
+	/* IPSec counters */
+	idx += mlx5e_ipsec_get_stats(priv, data + idx);
+
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9f99f624004f..1eac5003084f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,6 +39,9 @@
 #include "en.h"
 #include "en_tc.h"
 #include "en_rep.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "accel/ipsec.h"
 #include "vxlan.h"
 
 struct mlx5e_rq_param {
@@ -115,7 +118,7 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
 static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 {
 	u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
-		    !params->xdp_prog ?
+		    !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
 		    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
 		    MLX5_WQ_TYPE_LINKED_LIST;
 	mlx5e_set_rq_type_params(mdev, params, rq_type);
@@ -328,8 +331,10 @@ static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
 
 void mlx5e_update_stats(struct mlx5e_priv *priv, bool full)
 {
-	if (full)
+	if (full) {
 		mlx5e_update_pcie_counters(priv);
+		mlx5e_ipsec_update_stats(priv);
+	}
 	mlx5e_update_pport_counters(priv, full);
 	mlx5e_update_vport_counters(priv);
 	mlx5e_update_q_counter(priv);
@@ -592,6 +597,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
 		rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
+#ifdef CONFIG_MLX5_EN_IPSEC
+		if (MLX5_IPSEC_DEV(mdev)) {
+			err = -EINVAL;
+			netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n");
+			goto err_rq_wq_destroy;
+		}
+#endif
 		if (!rq->handle_rx_cqe) {
 			err = -EINVAL;
 			netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err);
@@ -624,7 +636,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		rq->alloc_wqe = mlx5e_alloc_rx_wqe;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
-		rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
+#ifdef CONFIG_MLX5_EN_IPSEC
+		if (c->priv->ipsec)
+			rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
+		else
+#endif
+			rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
 		if (!rq->handle_rx_cqe) {
 			kfree(rq->wqe.frag_info);
 			err = -EINVAL;
@@ -635,6 +652,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		rq->buff.wqe_sz = params->lro_en  ?
 				params->lro_wqe_sz :
 				MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu);
+#ifdef CONFIG_MLX5_EN_IPSEC
+		if (MLX5_IPSEC_DEV(mdev))
+			rq->buff.wqe_sz += MLX5E_METADATA_ETHER_LEN;
+#endif
 		rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en;
 		byte_count = rq->buff.wqe_sz;
 
@@ -1095,6 +1116,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->max_inline      = params->tx_max_inline;
 	sq->min_inline_mode = params->tx_min_inline_mode;
+	if (MLX5_IPSEC_DEV(c->priv->mdev))
+		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
@@ -1914,6 +1937,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
 
 	mlx5e_build_sq_param_common(priv, param);
 	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+	MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev));
 }
 
 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -3070,8 +3094,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 		PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
 	stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
 	stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
-	stats->tx_carrier_errors =
-		PPORT_802_3_GET(pstats, a_symbol_error_during_carrier);
 	stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
 			   stats->rx_frame_errors;
 	stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
@@ -3508,6 +3530,11 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 	features = vlan_features_check(skb, features);
 	features = vxlan_features_check(skb, features);
 
+#ifdef CONFIG_MLX5_EN_IPSEC
+	if (mlx5e_ipsec_feature_check(skb, netdev, features))
+		return features;
+#endif
+
 	/* Validate if the tunneled packet is being offloaded by HW */
 	if (skb->encapsulation &&
 	    (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
@@ -3555,6 +3582,12 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 		goto unlock;
 	}
 
+	if ((netdev->features & NETIF_F_HW_ESP) && prog) {
+		netdev_warn(netdev, "can't set XDP with IPSec offload\n");
+		err = -EINVAL;
+		goto unlock;
+	}
+
 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 	/* no need for full reset when exchanging programs */
 	reset = (!priv->channels.params.xdp_prog || !prog);
@@ -4046,6 +4079,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	if (MLX5_CAP_GEN(mdev, vport_group_manager))
 		netdev->switchdev_ops = &mlx5e_switchdev_ops;
 #endif
+
+	mlx5e_ipsec_build_netdev(priv);
 }
 
 static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
@@ -4074,14 +4109,19 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
 			   void *ppriv)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	int err;
 
 	mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
+	err = mlx5e_ipsec_init(priv);
+	if (err)
+		mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
 	mlx5e_build_nic_netdev(netdev);
 	mlx5e_vxlan_init(priv);
 }
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
+	mlx5e_ipsec_cleanup(priv);
 	mlx5e_vxlan_cleanup(priv);
 
 	if (priv->channels.params.xdp_prog)
@@ -4473,6 +4513,7 @@ static struct mlx5_interface mlx5e_interface = {
 
 void mlx5e_init(void)
 {
+	mlx5e_ipsec_build_inverse_table();
 	mlx5e_build_ptys2ethtool_map();
 	mlx5_register_interface(&mlx5e_interface);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5f3c138c948d..325b2c8c1c6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -41,6 +41,7 @@
 #include "eswitch.h"
 #include "en_rep.h"
 #include "ipoib/ipoib.h"
+#include "en_accel/ipsec_rxtx.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
 {
@@ -996,7 +997,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 		work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 
 	for (; work_done < budget; work_done++) {
-		struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq);
+		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_cqe(&cq->wq);
 
 		if (!cqe)
 			break;
@@ -1050,7 +1051,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 		u16 wqe_counter;
 		bool last_wqe;
 
-		cqe = mlx5e_get_cqe(cq);
+		cqe = mlx5_cqwq_get_cqe(&cq->wq);
 		if (!cqe)
 			break;
 
@@ -1183,3 +1184,43 @@ wq_free_wqe:
 }
 
 #endif /* CONFIG_MLX5_CORE_IPOIB */
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+	struct mlx5e_wqe_frag_info *wi;
+	struct mlx5e_rx_wqe *wqe;
+	__be16 wqe_counter_be;
+	struct sk_buff *skb;
+	u16 wqe_counter;
+	u32 cqe_bcnt;
+
+	wqe_counter_be = cqe->wqe_counter;
+	wqe_counter    = be16_to_cpu(wqe_counter_be);
+	wqe            = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+	wi             = &rq->wqe.frag_info[wqe_counter];
+	cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
+
+	skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+	if (unlikely(!skb)) {
+		/* a DROP, save the page-reuse checks */
+		mlx5e_free_rx_wqe(rq, wi);
+		goto wq_ll_pop;
+	}
+	skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb);
+	if (unlikely(!skb)) {
+		mlx5e_free_rx_wqe(rq, wi);
+		goto wq_ll_pop;
+	}
+
+	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+	napi_gro_receive(rq->cq.napi, skb);
+
+	mlx5e_free_rx_wqe_reuse(rq, wi);
+wq_ll_pop:
+	mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
+		       &wqe->next.next_wqe_index);
+}
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 0433d69429f3..aaa0f4ebba9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -34,6 +34,7 @@
 #include <linux/if_vlan.h>
 #include "en.h"
 #include "ipoib/ipoib.h"
+#include "en_accel/ipsec_rxtx.h"
 
 #define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
 #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
@@ -299,12 +300,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 }
 
-static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+				 struct mlx5e_tx_wqe *wqe, u16 pi)
 {
-	struct mlx5_wq_cyc       *wq   = &sq->wq;
-
-	u16 pi = sq->pc & wq->sz_m1;
-	struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
 	struct mlx5e_tx_wqe_info *wi   = &sq->db.wqe_info[pi];
 
 	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
@@ -319,8 +317,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb)
 	u16 ds_cnt;
 	u16 ihs;
 
-	memset(wqe, 0, sizeof(*wqe));
-
 	mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
 
 	if (skb_is_gso(skb)) {
@@ -375,8 +371,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 pi = sq->pc & wq->sz_m1;
+	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+
+	memset(wqe, 0, sizeof(*wqe));
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+	if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) {
+		skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb);
+		if (unlikely(!skb))
+			return NETDEV_TX_OK;
+	}
+#endif
 
-	return mlx5e_sq_xmit(sq, skb);
+	return mlx5e_sq_xmit(sq, skb, wqe, pi);
 }
 
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
@@ -409,7 +418,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 		u16 wqe_counter;
 		bool last_wqe;
 
-		cqe = mlx5e_get_cqe(cq);
+		cqe = mlx5_cqwq_get_cqe(&cq->wq);
 		if (!cqe)
 			break;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 5ca6714e3e02..92db28a9ed43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -32,23 +32,6 @@
 
 #include "en.h"
 
-struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq)
-{
-	struct mlx5_cqwq *wq = &cq->wq;
-	u32 ci = mlx5_cqwq_get_ci(wq);
-	struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
-	u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
-	u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1;
-
-	if (cqe_ownership_bit != sw_ownership_val)
-		return NULL;
-
-	/* ensure cqe content is read after cqe ownership bit */
-	dma_rmb();
-
-	return cqe;
-}
-
 static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
 					     struct mlx5e_icosq *sq,
 					     struct mlx5_cqe64 *cqe,
@@ -89,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
 		return;
 
-	cqe = mlx5e_get_cqe(cq);
+	cqe = mlx5_cqwq_get_cqe(&cq->wq);
 	if (likely(!cqe))
 		return;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index 99cba644b4fc..e37453d838db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -33,10 +33,44 @@
 #include <linux/etherdevice.h>
 #include <linux/mlx5/cmd.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
 
 #include "mlx5_core.h"
 #include "fpga/cmd.h"
 
+#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \
+				 MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+			 void *buf, bool write)
+{
+	u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0};
+	u32 out[MLX5_FPGA_ACCESS_REG_SZ];
+	int err;
+
+	if (size & 3)
+		return -EINVAL;
+	if (addr & 3)
+		return -EINVAL;
+	if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+		return -EINVAL;
+
+	MLX5_SET(fpga_access_reg, in, size, size);
+	MLX5_SET64(fpga_access_reg, in, address, addr);
+	if (write)
+		memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_FPGA_ACCESS_REG, 0, write);
+	if (err)
+		return err;
+
+	if (!write)
+		memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size);
+
+	return 0;
+}
+
 int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
 {
 	u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0};
@@ -46,6 +80,49 @@ int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
 				    MLX5_REG_FPGA_CAP, 0, 0);
 }
 
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+
+	MLX5_SET(fpga_ctrl, in, operation, op);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				    MLX5_REG_FPGA_CTRL, 0, true);
+}
+
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size)
+{
+	unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len);
+	u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr);
+	unsigned int read;
+	int ret = 0;
+
+	if (cap_size > size) {
+		mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u",
+			       size, cap_size);
+		return -EINVAL;
+	}
+
+	while (cap_size > 0) {
+		read = min_t(unsigned int, cap_size,
+			     MLX5_FPGA_ACCESS_REG_SIZE_MAX);
+
+		ret = mlx5_fpga_access_reg(dev, read, addr, caps, false);
+		if (ret) {
+			mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d",
+				       read, addr, ret);
+			return ret;
+		}
+
+		cap_size -= read;
+		addr += read;
+		caps += read;
+	}
+
+	return ret;
+}
+
 int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
 {
 	u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
@@ -62,3 +139,100 @@ int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
 	query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper);
 	return 0;
 }
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+			u32 *fpga_qpn)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)];
+	int ret;
+
+	MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP);
+	memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc,
+	       MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc));
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc),
+	       MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc));
+	*fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn);
+	return ret;
+}
+
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+			enum mlx5_fpga_qpc_field_select fields,
+			void *fpga_qpc)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)];
+
+	MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP);
+	MLX5_SET(fpga_modify_qp_in, in, field_select, fields);
+	MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn);
+	memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc,
+	       MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc));
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
+		       u32 fpga_qpn, void *fpga_qpc)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)];
+	int ret;
+
+	MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP);
+	MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn);
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc),
+	       MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc));
+	return ret;
+}
+
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)];
+
+	MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP);
+	MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+				bool clear, struct mlx5_fpga_qp_counters *data)
+{
+	u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)];
+	int ret;
+
+	MLX5_SET(fpga_query_qp_counters_in, in, opcode,
+		 MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS);
+	MLX5_SET(fpga_query_qp_counters_in, in, clear, clear);
+	MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn);
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+					  rx_ack_packets);
+	data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+					   rx_send_packets);
+	data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+					  tx_ack_packets);
+	data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+					   tx_send_packets);
+	data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out,
+					 rx_total_drop);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
index a74396a61bc3..94bdfd47c3f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -53,7 +53,32 @@ struct mlx5_fpga_query {
 	enum mlx5_fpga_status status;
 };
 
+enum mlx5_fpga_qpc_field_select {
+	MLX5_FPGA_QPC_STATE = BIT(0),
+};
+
+struct mlx5_fpga_qp_counters {
+	u64 rx_ack_packets;
+	u64 rx_send_packets;
+	u64 tx_ack_packets;
+	u64 tx_send_packets;
+	u64 rx_total_drop;
+};
+
 int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps);
 int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op);
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+			 void *buf, bool write);
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size);
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+			u32 *fpga_qpn);
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+			enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc);
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc);
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+				bool clear, struct mlx5_fpga_qp_counters *data);
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn);
 
 #endif /* __MLX5_FPGA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
new file mode 100644
index 000000000000..c4392f741c5f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -0,0 +1,1042 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <net/addrconf.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/vport.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "fpga/conn.h"
+
+#define MLX5_FPGA_PKEY 0xFFFF
+#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */
+#define MLX5_FPGA_RECV_SIZE 2048
+#define MLX5_FPGA_PORT_NUM 1
+#define MLX5_FPGA_CQ_BUDGET 64
+
+static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn,
+				  struct mlx5_fpga_dma_buf *buf)
+{
+	struct device *dma_device;
+	int err = 0;
+
+	if (unlikely(!buf->sg[0].data))
+		goto out;
+
+	dma_device = &conn->fdev->mdev->pdev->dev;
+	buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data,
+					     buf->sg[0].size, buf->dma_dir);
+	err = dma_mapping_error(dma_device, buf->sg[0].dma_addr);
+	if (unlikely(err)) {
+		mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (!buf->sg[1].data)
+		goto out;
+
+	buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data,
+					     buf->sg[1].size, buf->dma_dir);
+	err = dma_mapping_error(dma_device, buf->sg[1].dma_addr);
+	if (unlikely(err)) {
+		mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err);
+		dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+				 buf->sg[0].size, buf->dma_dir);
+		err = -ENOMEM;
+	}
+
+out:
+	return err;
+}
+
+static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn,
+				     struct mlx5_fpga_dma_buf *buf)
+{
+	struct device *dma_device;
+
+	dma_device = &conn->fdev->mdev->pdev->dev;
+	if (buf->sg[1].data)
+		dma_unmap_single(dma_device, buf->sg[1].dma_addr,
+				 buf->sg[1].size, buf->dma_dir);
+
+	if (likely(buf->sg[0].data))
+		dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+				 buf->sg[0].size, buf->dma_dir);
+}
+
+static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn,
+				    struct mlx5_fpga_dma_buf *buf)
+{
+	struct mlx5_wqe_data_seg *data;
+	unsigned int ix;
+	int err = 0;
+
+	err = mlx5_fpga_conn_map_buf(conn, buf);
+	if (unlikely(err))
+		goto out;
+
+	if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) {
+		mlx5_fpga_conn_unmap_buf(conn, buf);
+		return -EBUSY;
+	}
+
+	ix = conn->qp.rq.pc & (conn->qp.rq.size - 1);
+	data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix);
+	data->byte_count = cpu_to_be32(buf->sg[0].size);
+	data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+	data->addr = cpu_to_be64(buf->sg[0].dma_addr);
+
+	conn->qp.rq.pc++;
+	conn->qp.rq.bufs[ix] = buf;
+
+	/* Make sure that descriptors are written before doorbell record. */
+	dma_wmb();
+	*conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff);
+out:
+	return err;
+}
+
+static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe)
+{
+	/* ensure wqe is visible to device before updating doorbell record */
+	dma_wmb();
+	*conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc);
+	/* Make sure that doorbell record is visible before ringing */
+	wmb();
+	mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL);
+}
+
+static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
+				     struct mlx5_fpga_dma_buf *buf)
+{
+	struct mlx5_wqe_ctrl_seg *ctrl;
+	struct mlx5_wqe_data_seg *data;
+	unsigned int ix, sgi;
+	int size = 1;
+
+	ix = conn->qp.sq.pc & (conn->qp.sq.size - 1);
+
+	ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix);
+	data = (void *)(ctrl + 1);
+
+	for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) {
+		if (!buf->sg[sgi].data)
+			break;
+		data->byte_count = cpu_to_be32(buf->sg[sgi].size);
+		data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+		data->addr = cpu_to_be64(buf->sg[sgi].dma_addr);
+		data++;
+		size++;
+	}
+
+	ctrl->imm = 0;
+	ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+	ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) |
+					     MLX5_OPCODE_SEND);
+	ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.mqp.qpn << 8));
+
+	conn->qp.sq.pc++;
+	conn->qp.sq.bufs[ix] = buf;
+	mlx5_fpga_conn_notify_hw(conn, ctrl);
+}
+
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+			struct mlx5_fpga_dma_buf *buf)
+{
+	unsigned long flags;
+	int err;
+
+	if (!conn->qp.active)
+		return -ENOTCONN;
+
+	err = mlx5_fpga_conn_map_buf(conn, buf);
+	if (err)
+		return err;
+
+	spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+	if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) {
+		list_add_tail(&buf->list, &conn->qp.sq.backlog);
+		goto out_unlock;
+	}
+
+	mlx5_fpga_conn_post_send(conn, buf);
+
+out_unlock:
+	spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+	return err;
+}
+
+static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn)
+{
+	struct mlx5_fpga_dma_buf *buf;
+	int err;
+
+	buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->sg[0].data = (void *)(buf + 1);
+	buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+	buf->dma_dir = DMA_FROM_DEVICE;
+
+	err = mlx5_fpga_conn_post_recv(conn, buf);
+	if (err)
+		kfree(buf);
+
+	return err;
+}
+
+static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+				      struct mlx5_core_mkey *mkey)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+	void *mkc;
+	u32 *in;
+	int err;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+	MLX5_SET(mkc, mkc, lw, 1);
+	MLX5_SET(mkc, mkc, lr, 1);
+
+	MLX5_SET(mkc, mkc, pd, pdn);
+	MLX5_SET(mkc, mkc, length64, 1);
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+	kvfree(in);
+	return err;
+}
+
+static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn,
+				  struct mlx5_cqe64 *cqe, u8 status)
+{
+	struct mlx5_fpga_dma_buf *buf;
+	int ix, err;
+
+	ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1);
+	buf = conn->qp.rq.bufs[ix];
+	conn->qp.rq.bufs[ix] = NULL;
+	if (!status)
+		buf->sg[0].size = be32_to_cpu(cqe->byte_cnt);
+	conn->qp.rq.cc++;
+
+	if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+		mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+			       buf, conn->fpga_qpn, status);
+	else
+		mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+			      buf, conn->fpga_qpn, status);
+
+	mlx5_fpga_conn_unmap_buf(conn, buf);
+
+	if (unlikely(status || !conn->qp.active)) {
+		conn->qp.active = false;
+		kfree(buf);
+		return;
+	}
+
+	mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n",
+		      buf->sg[0].size);
+	conn->recv_cb(conn->cb_arg, buf);
+
+	buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+	err = mlx5_fpga_conn_post_recv(conn, buf);
+	if (unlikely(err)) {
+		mlx5_fpga_warn(conn->fdev,
+			       "Failed to re-post recv buf: %d\n", err);
+		kfree(buf);
+	}
+}
+
+static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn,
+				  struct mlx5_cqe64 *cqe, u8 status)
+{
+	struct mlx5_fpga_dma_buf *buf, *nextbuf;
+	unsigned long flags;
+	int ix;
+
+	spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+	ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1);
+	buf = conn->qp.sq.bufs[ix];
+	conn->qp.sq.bufs[ix] = NULL;
+	conn->qp.sq.cc++;
+
+	/* Handle backlog still under the spinlock to ensure message post order */
+	if (unlikely(!list_empty(&conn->qp.sq.backlog))) {
+		if (likely(conn->qp.active)) {
+			nextbuf = list_first_entry(&conn->qp.sq.backlog,
+						   struct mlx5_fpga_dma_buf, list);
+			list_del(&nextbuf->list);
+			mlx5_fpga_conn_post_send(conn, nextbuf);
+		}
+	}
+
+	spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+
+	if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+		mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+			       buf, conn->fpga_qpn, status);
+	else
+		mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+			      buf, conn->fpga_qpn, status);
+
+	mlx5_fpga_conn_unmap_buf(conn, buf);
+
+	if (likely(buf->complete))
+		buf->complete(conn, conn->fdev, buf, status);
+
+	if (unlikely(status))
+		conn->qp.active = false;
+}
+
+static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
+				      struct mlx5_cqe64 *cqe)
+{
+	u8 opcode, status = 0;
+
+	opcode = cqe->op_own >> 4;
+
+	switch (opcode) {
+	case MLX5_CQE_REQ_ERR:
+		status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+		/* Fall through */
+	case MLX5_CQE_REQ:
+		mlx5_fpga_conn_sq_cqe(conn, cqe, status);
+		break;
+
+	case MLX5_CQE_RESP_ERR:
+		status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+		/* Fall through */
+	case MLX5_CQE_RESP_SEND:
+		mlx5_fpga_conn_rq_cqe(conn, cqe, status);
+		break;
+	default:
+		mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n",
+			       opcode);
+	}
+}
+
+static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn)
+{
+	mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT,
+		    conn->fdev->conn_res.uar->map, conn->cq.wq.cc);
+}
+
+static void mlx5_fpga_conn_cq_event(struct mlx5_core_cq *mcq,
+				    enum mlx5_event event)
+{
+	struct mlx5_fpga_conn *conn;
+
+	conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+	mlx5_fpga_warn(conn->fdev, "CQ event %u on CQ #%u\n", event, mcq->cqn);
+}
+
+static void mlx5_fpga_conn_event(struct mlx5_core_qp *mqp, int event)
+{
+	struct mlx5_fpga_conn *conn;
+
+	conn = container_of(mqp, struct mlx5_fpga_conn, qp.mqp);
+	mlx5_fpga_warn(conn->fdev, "QP event %u on QP #%u\n", event, mqp->qpn);
+}
+
+static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
+				       unsigned int budget)
+{
+	struct mlx5_cqe64 *cqe;
+
+	while (budget) {
+		cqe = mlx5_cqwq_get_cqe(&conn->cq.wq);
+		if (!cqe)
+			break;
+
+		budget--;
+		mlx5_cqwq_pop(&conn->cq.wq);
+		mlx5_fpga_conn_handle_cqe(conn, cqe);
+		mlx5_cqwq_update_db_record(&conn->cq.wq);
+	}
+	if (!budget) {
+		tasklet_schedule(&conn->cq.tasklet);
+		return;
+	}
+
+	mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc);
+	/* ensure cq space is freed before enabling more cqes */
+	wmb();
+	mlx5_fpga_conn_arm_cq(conn);
+}
+
+static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
+{
+	struct mlx5_fpga_conn *conn = (void *)data;
+
+	if (unlikely(!conn->qp.active))
+		return;
+	mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq)
+{
+	struct mlx5_fpga_conn *conn;
+
+	conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+	if (unlikely(!conn->qp.active))
+		return;
+	mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
+{
+	struct mlx5_fpga_device *fdev = conn->fdev;
+	struct mlx5_core_dev *mdev = fdev->mdev;
+	u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0};
+	struct mlx5_wq_param wqp;
+	struct mlx5_cqe64 *cqe;
+	int inlen, err, eqn;
+	unsigned int irqn;
+	void *cqc, *in;
+	__be64 *pas;
+	u32 i;
+
+	cq_size = roundup_pow_of_two(cq_size);
+	MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size));
+
+	wqp.buf_numa_node = mdev->priv.numa_node;
+	wqp.db_numa_node  = mdev->priv.numa_node;
+
+	err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq,
+			       &conn->cq.wq_ctrl);
+	if (err)
+		return err;
+
+	for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) {
+		cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i);
+		cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
+	}
+
+	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+		sizeof(u64) * conn->cq.wq_ctrl.frag_buf.npages;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in) {
+		err = -ENOMEM;
+		goto err_cqwq;
+	}
+
+	err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
+	if (err)
+		goto err_cqwq;
+
+	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+	MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
+	MLX5_SET(cqc, cqc, c_eqn, eqn);
+	MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
+	MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.frag_buf.page_shift -
+			   MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma);
+
+	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
+	mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.frag_buf, pas);
+
+	err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
+	kvfree(in);
+
+	if (err)
+		goto err_cqwq;
+
+	conn->cq.mcq.cqe_sz     = 64;
+	conn->cq.mcq.set_ci_db  = conn->cq.wq_ctrl.db.db;
+	conn->cq.mcq.arm_db     = conn->cq.wq_ctrl.db.db + 1;
+	*conn->cq.mcq.set_ci_db = 0;
+	*conn->cq.mcq.arm_db    = 0;
+	conn->cq.mcq.vector     = 0;
+	conn->cq.mcq.comp       = mlx5_fpga_conn_cq_complete;
+	conn->cq.mcq.event      = mlx5_fpga_conn_cq_event;
+	conn->cq.mcq.irqn       = irqn;
+	conn->cq.mcq.uar        = fdev->conn_res.uar;
+	tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet,
+		     (unsigned long)conn);
+
+	mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
+
+	goto out;
+
+err_cqwq:
+	mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
+out:
+	return err;
+}
+
+static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn)
+{
+	tasklet_disable(&conn->cq.tasklet);
+	tasklet_kill(&conn->cq.tasklet);
+	mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq);
+	mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc)
+{
+	struct mlx5_fpga_device *fdev = conn->fdev;
+	struct mlx5_core_dev *mdev = fdev->mdev;
+	struct mlx5_wq_param wqp;
+
+	wqp.buf_numa_node = mdev->priv.numa_node;
+	wqp.db_numa_node  = mdev->priv.numa_node;
+
+	return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq,
+				 &conn->qp.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
+				    unsigned int tx_size, unsigned int rx_size)
+{
+	struct mlx5_fpga_device *fdev = conn->fdev;
+	struct mlx5_core_dev *mdev = fdev->mdev;
+	u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {0};
+	void *in = NULL, *qpc;
+	int err, inlen;
+
+	conn->qp.rq.pc = 0;
+	conn->qp.rq.cc = 0;
+	conn->qp.rq.size = roundup_pow_of_two(rx_size);
+	conn->qp.sq.pc = 0;
+	conn->qp.sq.cc = 0;
+	conn->qp.sq.size = roundup_pow_of_two(tx_size);
+
+	MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+	MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size));
+	MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size));
+	err = mlx5_fpga_conn_create_wq(conn, temp_qpc);
+	if (err)
+		goto out;
+
+	conn->qp.rq.bufs = kvzalloc(sizeof(conn->qp.rq.bufs[0]) *
+				    conn->qp.rq.size, GFP_KERNEL);
+	if (!conn->qp.rq.bufs) {
+		err = -ENOMEM;
+		goto err_wq;
+	}
+
+	conn->qp.sq.bufs = kvzalloc(sizeof(conn->qp.sq.bufs[0]) *
+				    conn->qp.sq.size, GFP_KERNEL);
+	if (!conn->qp.sq.bufs) {
+		err = -ENOMEM;
+		goto err_rq_bufs;
+	}
+
+	inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+		MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
+		conn->qp.wq_ctrl.buf.npages;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in) {
+		err = -ENOMEM;
+		goto err_sq_bufs;
+	}
+
+	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+	MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index);
+	MLX5_SET(qpc, qpc, log_page_size,
+		 conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET(qpc, qpc, fre, 1);
+	MLX5_SET(qpc, qpc, rlky, 1);
+	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+	MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn);
+	MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+	MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size));
+	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
+	MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size));
+	MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+	MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+	MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+	if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
+		MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
+
+	mlx5_fill_page_array(&conn->qp.wq_ctrl.buf,
+			     (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
+
+	err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen);
+	if (err)
+		goto err_sq_bufs;
+
+	conn->qp.mqp.event = mlx5_fpga_conn_event;
+	mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn);
+
+	goto out;
+
+err_sq_bufs:
+	kvfree(conn->qp.sq.bufs);
+err_rq_bufs:
+	kvfree(conn->qp.rq.bufs);
+err_wq:
+	mlx5_wq_destroy(&conn->qp.wq_ctrl);
+out:
+	kvfree(in);
+	return err;
+}
+
+static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn)
+{
+	int ix;
+
+	for (ix = 0; ix < conn->qp.rq.size; ix++) {
+		if (!conn->qp.rq.bufs[ix])
+			continue;
+		mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]);
+		kfree(conn->qp.rq.bufs[ix]);
+		conn->qp.rq.bufs[ix] = NULL;
+	}
+}
+
+static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn)
+{
+	struct mlx5_fpga_dma_buf *buf, *temp;
+	int ix;
+
+	for (ix = 0; ix < conn->qp.sq.size; ix++) {
+		buf = conn->qp.sq.bufs[ix];
+		if (!buf)
+			continue;
+		conn->qp.sq.bufs[ix] = NULL;
+		mlx5_fpga_conn_unmap_buf(conn, buf);
+		if (!buf->complete)
+			continue;
+		buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+	}
+	list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) {
+		mlx5_fpga_conn_unmap_buf(conn, buf);
+		if (!buf->complete)
+			continue;
+		buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+	}
+}
+
+static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
+{
+	mlx5_core_destroy_qp(conn->fdev->mdev, &conn->qp.mqp);
+	mlx5_fpga_conn_free_recv_bufs(conn);
+	mlx5_fpga_conn_flush_send_bufs(conn);
+	kvfree(conn->qp.sq.bufs);
+	kvfree(conn->qp.rq.bufs);
+	mlx5_wq_destroy(&conn->qp.wq_ctrl);
+}
+
+static inline int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
+{
+	struct mlx5_core_dev *mdev = conn->fdev->mdev;
+
+	mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn);
+
+	return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, NULL,
+				   &conn->qp.mqp);
+}
+
+static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
+{
+	struct mlx5_fpga_device *fdev = conn->fdev;
+	struct mlx5_core_dev *mdev = fdev->mdev;
+	u32 *qpc = NULL;
+	int err;
+
+	mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn);
+
+	qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+	if (!qpc) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+	MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+	MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM);
+	MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn);
+	MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+	MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+	MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+
+	err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
+				  &conn->qp.mqp);
+	if (err) {
+		mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+		goto out;
+	}
+
+out:
+	kfree(qpc);
+	return err;
+}
+
+static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
+{
+	struct mlx5_fpga_device *fdev = conn->fdev;
+	struct mlx5_core_dev *mdev = fdev->mdev;
+	u32 *qpc = NULL;
+	int err;
+
+	mlx5_fpga_dbg(conn->fdev, "QP RTR\n");
+
+	qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+	if (!qpc) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES);
+	MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg));
+	MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn);
+	MLX5_SET(qpc, qpc, next_rcv_psn,
+		 MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn));
+	MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+	MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM);
+	ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
+			MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32));
+	MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
+		 MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port));
+	MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
+		 conn->qp.sgid_index);
+	MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0);
+	memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
+	       MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip),
+	       MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip));
+
+	err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
+				  &conn->qp.mqp);
+	if (err) {
+		mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+		goto out;
+	}
+
+out:
+	kfree(qpc);
+	return err;
+}
+
+static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
+{
+	struct mlx5_fpga_device *fdev = conn->fdev;
+	struct mlx5_core_dev *mdev = fdev->mdev;
+	u32 *qpc = NULL;
+	u32 opt_mask;
+	int err;
+
+	mlx5_fpga_dbg(conn->fdev, "QP RTS\n");
+
+	qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+	if (!qpc) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	MLX5_SET(qpc, qpc, log_ack_req_freq, 8);
+	MLX5_SET(qpc, qpc, min_rnr_nak, 0x12);
+	MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */
+	MLX5_SET(qpc, qpc, next_send_psn,
+		 MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn));
+	MLX5_SET(qpc, qpc, retry_count, 7);
+	MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
+
+	opt_mask = MLX5_QP_OPTPAR_RNR_TIMEOUT;
+	err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, opt_mask, qpc,
+				  &conn->qp.mqp);
+	if (err) {
+		mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+		goto out;
+	}
+
+out:
+	kfree(qpc);
+	return err;
+}
+
+static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn)
+{
+	struct mlx5_fpga_device *fdev = conn->fdev;
+	int err;
+
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE);
+	err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+				  MLX5_FPGA_QPC_STATE, &conn->fpga_qpc);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err);
+		goto out;
+	}
+
+	err = mlx5_fpga_conn_reset_qp(conn);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to change QP state to reset\n");
+		goto err_fpga_qp;
+	}
+
+	err = mlx5_fpga_conn_init_qp(conn);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n");
+		goto err_fpga_qp;
+	}
+	conn->qp.active = true;
+
+	while (!mlx5_fpga_conn_post_recv_buf(conn))
+		;
+
+	err = mlx5_fpga_conn_rtr_qp(conn);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n");
+		goto err_recv_bufs;
+	}
+
+	err = mlx5_fpga_conn_rts_qp(conn);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n");
+		goto err_recv_bufs;
+	}
+	goto out;
+
+err_recv_bufs:
+	mlx5_fpga_conn_free_recv_bufs(conn);
+err_fpga_qp:
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+	if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+				MLX5_FPGA_QPC_STATE, &conn->fpga_qpc))
+		mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n");
+out:
+	return err;
+}
+
+struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+					     struct mlx5_fpga_conn_attr *attr,
+					     enum mlx5_ifc_fpga_qp_type qp_type)
+{
+	struct mlx5_fpga_conn *ret, *conn;
+	u8 *remote_mac, *remote_ip;
+	int err;
+
+	if (!attr->recv_cb)
+		return ERR_PTR(-EINVAL);
+
+	conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+	if (!conn)
+		return ERR_PTR(-ENOMEM);
+
+	conn->fdev = fdev;
+	INIT_LIST_HEAD(&conn->qp.sq.backlog);
+
+	spin_lock_init(&conn->qp.sq.lock);
+
+	conn->recv_cb = attr->recv_cb;
+	conn->cb_arg = attr->cb_arg;
+
+	remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32);
+	err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err);
+		ret = ERR_PTR(err);
+		goto err;
+	}
+
+	/* Build Modified EUI-64 IPv6 address from the MAC address */
+	remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip);
+	remote_ip[0] = 0xfe;
+	remote_ip[1] = 0x80;
+	addrconf_addr_eui48(&remote_ip[8], remote_mac);
+
+	err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err);
+		ret = ERR_PTR(err);
+		goto err;
+	}
+
+	err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index,
+				     MLX5_ROCE_VERSION_2,
+				     MLX5_ROCE_L3_TYPE_IPV6,
+				     remote_ip, remote_mac, true, 0);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err);
+		ret = ERR_PTR(err);
+		goto err_rsvd_gid;
+	}
+	mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index);
+
+	/* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe,
+	 * created during processing of the cqe
+	 */
+	err = mlx5_fpga_conn_create_cq(conn,
+				       (attr->tx_size + attr->rx_size) * 2);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err);
+		ret = ERR_PTR(err);
+		goto err_gid;
+	}
+
+	mlx5_fpga_conn_arm_cq(conn);
+
+	err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err);
+		ret = ERR_PTR(err);
+		goto err_cq;
+	}
+
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.mqp.qpn);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7);
+
+	err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc,
+				  &conn->fpga_qpn);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err);
+		ret = ERR_PTR(err);
+		goto err_qp;
+	}
+
+	err = mlx5_fpga_conn_connect(conn);
+	if (err) {
+		ret = ERR_PTR(err);
+		goto err_conn;
+	}
+
+	mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn);
+	ret = conn;
+	goto out;
+
+err_conn:
+	mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+err_qp:
+	mlx5_fpga_conn_destroy_qp(conn);
+err_cq:
+	mlx5_fpga_conn_destroy_cq(conn);
+err_gid:
+	mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL,
+			       NULL, false, 0);
+err_rsvd_gid:
+	mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index);
+err:
+	kfree(conn);
+out:
+	return ret;
+}
+
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+	struct mlx5_fpga_device *fdev = conn->fdev;
+	struct mlx5_core_dev *mdev = fdev->mdev;
+	int err = 0;
+
+	conn->qp.active = false;
+	tasklet_disable(&conn->cq.tasklet);
+	synchronize_irq(conn->cq.mcq.irqn);
+
+	mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+	err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, NULL,
+				  &conn->qp.mqp);
+	if (err)
+		mlx5_fpga_warn(fdev, "qp_modify 2ERR failed: %d\n", err);
+	mlx5_fpga_conn_destroy_qp(conn);
+	mlx5_fpga_conn_destroy_cq(conn);
+
+	mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0,
+			       NULL, NULL, false, 0);
+	mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index);
+	kfree(conn);
+}
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev)
+{
+	int err;
+
+	err = mlx5_nic_vport_enable_roce(fdev->mdev);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err);
+		goto out;
+	}
+
+	fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev);
+	if (IS_ERR(fdev->conn_res.uar)) {
+		err = PTR_ERR(fdev->conn_res.uar);
+		mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err);
+		goto err_roce;
+	}
+	mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n",
+		      fdev->conn_res.uar->index);
+
+	err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn);
+	if (err) {
+		mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err);
+		goto err_uar;
+	}
+	mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn);
+
+	err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn,
+					 &fdev->conn_res.mkey);
+	if (err) {
+		mlx5_fpga_err(fdev, "create mkey failed, %d\n", err);
+		goto err_dealloc_pd;
+	}
+	mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key);
+
+	return 0;
+
+err_dealloc_pd:
+	mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+err_uar:
+	mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+err_roce:
+	mlx5_nic_vport_disable_roce(fdev->mdev);
+out:
+	return err;
+}
+
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev)
+{
+	mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey);
+	mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+	mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+	mlx5_nic_vport_disable_roce(fdev->mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
new file mode 100644
index 000000000000..44bd9eccc711
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_CONN_H__
+#define __MLX5_FPGA_CONN_H__
+
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+
+#include "fpga/core.h"
+#include "fpga/sdk.h"
+#include "wq.h"
+
+struct mlx5_fpga_conn {
+	struct mlx5_fpga_device *fdev;
+
+	void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+	void *cb_arg;
+
+	/* FPGA QP */
+	u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)];
+	u32 fpga_qpn;
+
+	/* CQ */
+	struct {
+		struct mlx5_cqwq wq;
+		struct mlx5_frag_wq_ctrl wq_ctrl;
+		struct mlx5_core_cq mcq;
+		struct tasklet_struct tasklet;
+	} cq;
+
+	/* QP */
+	struct {
+		bool active;
+		int sgid_index;
+		struct mlx5_wq_qp wq;
+		struct mlx5_wq_ctrl wq_ctrl;
+		struct mlx5_core_qp mqp;
+		struct {
+			spinlock_t lock; /* Protects all SQ state */
+			unsigned int pc;
+			unsigned int cc;
+			unsigned int size;
+			struct mlx5_fpga_dma_buf **bufs;
+			struct list_head backlog;
+		} sq;
+		struct {
+			unsigned int pc;
+			unsigned int cc;
+			unsigned int size;
+			struct mlx5_fpga_dma_buf **bufs;
+		} rq;
+	} qp;
+};
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev);
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev);
+struct mlx5_fpga_conn *
+mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+		      struct mlx5_fpga_conn_attr *attr,
+		      enum mlx5_ifc_fpga_qp_type qp_type);
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn);
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+			struct mlx5_fpga_dma_buf *buf);
+
+#endif /* __MLX5_FPGA_CONN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index d88b332e9669..31e5a2627eb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -35,7 +35,9 @@
 #include <linux/mlx5/driver.h>
 
 #include "mlx5_core.h"
+#include "lib/mlx5.h"
 #include "fpga/core.h"
+#include "fpga/conn.h"
 
 static const char *const mlx5_fpga_error_strings[] = {
 	"Null Syndrome",
@@ -100,10 +102,34 @@ static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
 	return 0;
 }
 
+int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
+{
+	int err;
+	struct mlx5_core_dev *mdev = fdev->mdev;
+
+	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
+		return err;
+	}
+	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
+		return err;
+	}
+	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
+		return err;
+	}
+	return 0;
+}
+
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	unsigned long flags;
+	unsigned int max_num_qps;
 	int err;
 
 	if (!fdev)
@@ -123,6 +149,28 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 		       mlx5_fpga_image_name(fdev->last_oper_image),
 		       MLX5_CAP_FPGA(fdev->mdev, image_version));
 
+	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+	err = mlx5_core_reserve_gids(mdev, max_num_qps);
+	if (err)
+		goto out;
+
+	err = mlx5_fpga_conn_device_init(fdev);
+	if (err)
+		goto err_rsvd_gid;
+
+	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+		err = mlx5_fpga_device_brb(fdev);
+		if (err)
+			goto err_conn_init;
+	}
+
+	goto out;
+
+err_conn_init:
+	mlx5_fpga_conn_device_cleanup(fdev);
+
+err_rsvd_gid:
+	mlx5_core_unreserve_gids(mdev, max_num_qps);
 out:
 	spin_lock_irqsave(&fdev->state_lock, flags);
 	fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
@@ -130,7 +178,7 @@ out:
 	return err;
 }
 
-int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
+int mlx5_fpga_init(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_device *fdev = NULL;
 
@@ -151,9 +199,42 @@ int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
 	return 0;
 }
 
-void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	unsigned int max_num_qps;
+	unsigned long flags;
+	int err;
+
+	if (!fdev)
+		return;
+
+	spin_lock_irqsave(&fdev->state_lock, flags);
+	if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
+		spin_unlock_irqrestore(&fdev->state_lock, flags);
+		return;
+	}
+	fdev->state = MLX5_FPGA_STATUS_NONE;
+	spin_unlock_irqrestore(&fdev->state_lock, flags);
+
+	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+		err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+		if (err)
+			mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
+				      err);
+	}
+
+	mlx5_fpga_conn_device_cleanup(fdev);
+	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+	mlx5_core_unreserve_gids(mdev, max_num_qps);
+}
+
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
 {
-	kfree(mdev->fpga);
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+
+	mlx5_fpga_device_stop(mdev);
+	kfree(fdev);
 	mdev->fpga = NULL;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index c55044d66778..82405ed84725 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -44,6 +44,15 @@ struct mlx5_fpga_device {
 	enum mlx5_fpga_status state;
 	enum mlx5_fpga_image last_admin_image;
 	enum mlx5_fpga_image last_oper_image;
+
+	/* QP Connection resources */
+	struct {
+		u32 pdn;
+		struct mlx5_core_mkey mkey;
+		struct mlx5_uars_page *uar;
+	} conn_res;
+
+	struct mlx5_fpga_ipsec *ipsec;
 };
 
 #define mlx5_fpga_dbg(__adev, format, ...) \
@@ -68,19 +77,20 @@ struct mlx5_fpga_device {
 #define mlx5_fpga_info(__adev, format, ...) \
 	dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
 
-int mlx5_fpga_device_init(struct mlx5_core_dev *mdev);
-void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev);
+int mlx5_fpga_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
 void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
 
 #else
 
-static inline int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
+static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev)
 {
 	return 0;
 }
 
-static inline void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
+static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
 {
 }
 
@@ -89,6 +99,10 @@ static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 	return 0;
 }
 
+static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+}
+
 static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
 				   void *data)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
new file mode 100644
index 000000000000..42970e2a05ff
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "fpga/ipsec.h"
+#include "fpga/sdk.h"
+#include "fpga/core.h"
+
+#define SBU_QP_QUEUE_SIZE 8
+
+enum mlx5_ipsec_response_syndrome {
+	MLX5_IPSEC_RESPONSE_SUCCESS = 0,
+	MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
+	MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2,
+	MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+};
+
+enum mlx5_fpga_ipsec_sacmd_status {
+	MLX5_FPGA_IPSEC_SACMD_PENDING,
+	MLX5_FPGA_IPSEC_SACMD_SEND_FAIL,
+	MLX5_FPGA_IPSEC_SACMD_COMPLETE,
+};
+
+struct mlx5_ipsec_command_context {
+	struct mlx5_fpga_dma_buf buf;
+	struct mlx5_accel_ipsec_sa sa;
+	enum mlx5_fpga_ipsec_sacmd_status status;
+	int status_code;
+	struct completion complete;
+	struct mlx5_fpga_device *dev;
+	struct list_head list; /* Item in pending_cmds */
+};
+
+struct mlx5_ipsec_sadb_resp {
+	__be32 syndrome;
+	__be32 sw_sa_handle;
+	u8 reserved[24];
+} __packed;
+
+struct mlx5_fpga_ipsec {
+	struct list_head pending_cmds;
+	spinlock_t pending_cmds_lock; /* Protects pending_cmds */
+	u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
+	struct mlx5_fpga_conn *conn;
+};
+
+static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
+{
+	if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
+		return false;
+
+	if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
+	    MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
+		return false;
+
+	if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
+	    MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC)
+		return false;
+
+	return true;
+}
+
+static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
+					  struct mlx5_fpga_device *fdev,
+					  struct mlx5_fpga_dma_buf *buf,
+					  u8 status)
+{
+	struct mlx5_ipsec_command_context *context;
+
+	if (status) {
+		context = container_of(buf, struct mlx5_ipsec_command_context,
+				       buf);
+		mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
+			       status);
+		context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL;
+		complete(&context->complete);
+	}
+}
+
+static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
+{
+	switch (syndrome) {
+	case MLX5_IPSEC_RESPONSE_SUCCESS:
+		return 0;
+	case MLX5_IPSEC_RESPONSE_SADB_ISSUE:
+		return -EEXIST;
+	case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST:
+		return -EINVAL;
+	case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
+		return -EIO;
+	}
+	return -EIO;
+}
+
+static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
+{
+	struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data;
+	struct mlx5_ipsec_command_context *context;
+	enum mlx5_ipsec_response_syndrome syndrome;
+	struct mlx5_fpga_device *fdev = cb_arg;
+	unsigned long flags;
+
+	if (buf->sg[0].size < sizeof(*resp)) {
+		mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n",
+			       buf->sg[0].size, sizeof(*resp));
+		return;
+	}
+
+	mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n",
+		      ntohl(resp->syndrome), ntohl(resp->sw_sa_handle));
+
+	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+	context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
+					   struct mlx5_ipsec_command_context,
+					   list);
+	if (context)
+		list_del(&context->list);
+	spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+	if (!context) {
+		mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n");
+		return;
+	}
+	mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
+
+	if (context->sa.sw_sa_handle != resp->sw_sa_handle) {
+		mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+			      ntohl(context->sa.sw_sa_handle),
+			      ntohl(resp->sw_sa_handle));
+		return;
+	}
+
+	syndrome = ntohl(resp->syndrome);
+	context->status_code = syndrome_to_errno(syndrome);
+	context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE;
+
+	if (context->status_code)
+		mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n",
+			       syndrome);
+	complete(&context->complete);
+}
+
+void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+				  struct mlx5_accel_ipsec_sa *cmd)
+{
+	struct mlx5_ipsec_command_context *context;
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	unsigned long flags;
+	int res = 0;
+
+	BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0);
+	if (!fdev || !fdev->ipsec)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	context = kzalloc(sizeof(*context), GFP_ATOMIC);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&context->sa, cmd, sizeof(*cmd));
+	context->buf.complete = mlx5_fpga_ipsec_send_complete;
+	context->buf.sg[0].size = sizeof(context->sa);
+	context->buf.sg[0].data = &context->sa;
+	init_completion(&context->complete);
+	context->dev = fdev;
+	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+	list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
+	spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+	context->status = MLX5_FPGA_IPSEC_SACMD_PENDING;
+
+	res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
+	if (res) {
+		mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n",
+			       res);
+		spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+		list_del(&context->list);
+		spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+		kfree(context);
+		return ERR_PTR(res);
+	}
+	/* Context will be freed by wait func after completion */
+	return context;
+}
+
+int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
+{
+	struct mlx5_ipsec_command_context *context = ctx;
+	int res;
+
+	res = wait_for_completion_killable(&context->complete);
+	if (res) {
+		mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
+		return -EINTR;
+	}
+
+	if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE)
+		res = context->status_code;
+	else
+		res = -EIO;
+
+	kfree(context);
+	return res;
+}
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	u32 ret = 0;
+
+	if (mlx5_fpga_is_ipsec_device(mdev))
+		ret |= MLX5_ACCEL_IPSEC_DEVICE;
+	else
+		return ret;
+
+	if (!fdev->ipsec)
+		return ret;
+
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
+		ret |= MLX5_ACCEL_IPSEC_ESP;
+
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
+		ret |= MLX5_ACCEL_IPSEC_IPV6;
+
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
+		ret |= MLX5_ACCEL_IPSEC_LSO;
+
+	return ret;
+}
+
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+
+	if (!fdev || !fdev->ipsec)
+		return 0;
+
+	return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+			number_of_ipsec_counters);
+}
+
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+				  unsigned int counters_count)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	unsigned int i;
+	u32 *data;
+	u32 count;
+	u64 addr;
+	int ret;
+
+	if (!fdev || !fdev->ipsec)
+		return 0;
+
+	addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+			     ipsec_counters_addr_low) +
+	       ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+			     ipsec_counters_addr_high) << 32);
+
+	count = mlx5_fpga_ipsec_counters_count(mdev);
+
+	data = kzalloc(sizeof(u32) * count * 2, GFP_KERNEL);
+	if (!data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data,
+				 MLX5_FPGA_ACCESS_TYPE_DONTCARE);
+	if (ret < 0) {
+		mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n",
+			      ret);
+		goto out;
+	}
+	ret = 0;
+
+	if (count > counters_count)
+		count = counters_count;
+
+	/* Each counter is low word, then high. But each word is big-endian */
+	for (i = 0; i < count; i++)
+		counters[i] = (u64)ntohl(data[i * 2]) |
+			      ((u64)ntohl(data[i * 2 + 1]) << 32);
+
+out:
+	kfree(data);
+	return ret;
+}
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_conn_attr init_attr = {0};
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	struct mlx5_fpga_conn *conn;
+	int err;
+
+	if (!mlx5_fpga_is_ipsec_device(mdev))
+		return 0;
+
+	fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL);
+	if (!fdev->ipsec)
+		return -ENOMEM;
+
+	err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
+				     fdev->ipsec->caps);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n",
+			      err);
+		goto error;
+	}
+
+	INIT_LIST_HEAD(&fdev->ipsec->pending_cmds);
+	spin_lock_init(&fdev->ipsec->pending_cmds_lock);
+
+	init_attr.rx_size = SBU_QP_QUEUE_SIZE;
+	init_attr.tx_size = SBU_QP_QUEUE_SIZE;
+	init_attr.recv_cb = mlx5_fpga_ipsec_recv;
+	init_attr.cb_arg = fdev;
+	conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
+	if (IS_ERR(conn)) {
+		err = PTR_ERR(conn);
+		mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n",
+			      err);
+		goto error;
+	}
+	fdev->ipsec->conn = conn;
+	return 0;
+
+error:
+	kfree(fdev->ipsec);
+	fdev->ipsec = NULL;
+	return err;
+}
+
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+
+	if (!mlx5_fpga_is_ipsec_device(mdev))
+		return;
+
+	mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
+	kfree(fdev->ipsec);
+	fdev->ipsec = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
new file mode 100644
index 000000000000..26a3e4b56972
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_IPSEC_H__
+#define __MLX5_FPGA_IPSEC_H__
+
+#include "accel/ipsec.h"
+
+#ifdef CONFIG_MLX5_FPGA
+
+void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+				  struct mlx5_accel_ipsec_sa *cmd);
+int mlx5_fpga_ipsec_sa_cmd_wait(void *context);
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+				  unsigned int counters_count);
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+						struct mlx5_accel_ipsec_sa *cmd)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
+
+static inline unsigned int
+mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
+
+static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
+						u64 *counters)
+{
+	return 0;
+}
+
+static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
+
+static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif /* CONFIG_MLX5_FPGA */
+
+#endif	/* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
new file mode 100644
index 000000000000..3c11d6e2160a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "fpga/core.h"
+#include "fpga/conn.h"
+#include "fpga/sdk.h"
+
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+			  struct mlx5_fpga_conn_attr *attr)
+{
+	return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create);
+
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+	mlx5_fpga_conn_destroy(conn);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy);
+
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+			       struct mlx5_fpga_dma_buf *buf)
+{
+	return mlx5_fpga_conn_send(conn, buf);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg);
+
+static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
+				  u64 addr, u8 *buf)
+{
+	size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+	size_t bytes_done = 0;
+	u8 actual_size;
+	int err;
+
+	if (!fdev->mdev)
+		return -ENOTCONN;
+
+	while (bytes_done < size) {
+		actual_size = min(max_size, (size - bytes_done));
+
+		err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+					   addr + bytes_done,
+					   buf + bytes_done, false);
+		if (err) {
+			mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n",
+				      err);
+			break;
+		}
+
+		bytes_done += actual_size;
+	}
+
+	return err;
+}
+
+static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
+				   u64 addr, u8 *buf)
+{
+	size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+	size_t bytes_done = 0;
+	u8 actual_size;
+	int err;
+
+	if (!fdev->mdev)
+		return -ENOTCONN;
+
+	while (bytes_done < size) {
+		actual_size = min(max_size, (size - bytes_done));
+
+		err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+					   addr + bytes_done,
+					   buf + bytes_done, true);
+		if (err) {
+			mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n");
+			break;
+		}
+
+		bytes_done += actual_size;
+	}
+
+	return err;
+}
+
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+		       void *buf, enum mlx5_fpga_access_type access_type)
+{
+	int ret;
+
+	switch (access_type) {
+	case MLX5_FPGA_ACCESS_TYPE_I2C:
+		ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf);
+		if (ret)
+			return ret;
+		break;
+	default:
+		mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n",
+			       access_type);
+		return -EACCES;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_read);
+
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+			void *buf, enum mlx5_fpga_access_type access_type)
+{
+	int ret;
+
+	switch (access_type) {
+	case MLX5_FPGA_ACCESS_TYPE_I2C:
+		ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf);
+		if (ret)
+			return ret;
+		break;
+	default:
+		mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n",
+			       access_type);
+		return -EACCES;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_write);
+
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf)
+{
+	return mlx5_fpga_sbu_caps(fdev->mdev, buf, size);
+}
+EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
new file mode 100644
index 000000000000..baa537e54a49
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MLX5_FPGA_SDK_H
+#define MLX5_FPGA_SDK_H
+
+#include <linux/types.h>
+#include <linux/dma-direction.h>
+
+/**
+ * DOC: Innova SDK
+ * This header defines the in-kernel API for Innova FPGA client drivers.
+ */
+
+enum mlx5_fpga_access_type {
+	MLX5_FPGA_ACCESS_TYPE_I2C = 0x0,
+	MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0,
+};
+
+struct mlx5_fpga_conn;
+struct mlx5_fpga_device;
+
+/**
+ * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry
+ */
+struct mlx5_fpga_dma_entry {
+	/** @data: Virtual address pointer to the data */
+	void *data;
+	/** @size: Size in bytes of the data */
+	unsigned int size;
+	/** @dma_addr: Private member. Physical DMA-mapped address of the data */
+	dma_addr_t dma_addr;
+};
+
+/**
+ * struct mlx5_fpga_dma_buf - A packet buffer
+ * May contain up to 2 scatter-gather data entries
+ */
+struct mlx5_fpga_dma_buf {
+	/** @dma_dir: DMA direction */
+	enum dma_data_direction dma_dir;
+	/** @sg: Scatter-gather entries pointing to the data in memory */
+	struct mlx5_fpga_dma_entry sg[2];
+	/** @list: Item in SQ backlog, for TX packets */
+	struct list_head list;
+	/**
+	 * @complete: Completion routine, for TX packets
+	 * @conn: FPGA Connection this packet was sent to
+	 * @fdev: FPGA device this packet was sent to
+	 * @buf: The packet buffer
+	 * @status: 0 if successful, or an error code otherwise
+	 */
+	void (*complete)(struct mlx5_fpga_conn *conn,
+			 struct mlx5_fpga_device *fdev,
+			 struct mlx5_fpga_dma_buf *buf, u8 status);
+};
+
+/**
+ * struct mlx5_fpga_conn_attr - FPGA connection attributes
+ * Describes the attributes of a connection
+ */
+struct mlx5_fpga_conn_attr {
+	/** @tx_size: Size of connection TX queue, in packets */
+	unsigned int tx_size;
+	/** @rx_size: Size of connection RX queue, in packets */
+	unsigned int rx_size;
+	/**
+	 * @recv_cb: Callback function which is called for received packets
+	 * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg
+	 * @buf: A buffer containing a received packet
+	 *
+	 * buf is guaranteed to only contain a single scatter-gather entry.
+	 * The size of the actual packet received is specified in buf.sg[0].size
+	 * When this callback returns, the packet buffer may be re-used for
+	 * subsequent receives.
+	 */
+	void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+	void *cb_arg;
+};
+
+/**
+ * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection
+ * @fdev: The FPGA device
+ * @attr: Attributes of the new connection
+ *
+ * Sets up a new FPGA SBU connection with the specified attributes.
+ * The receive callback function may be called for incoming messages even
+ * before this function returns.
+ *
+ * The caller must eventually destroy the connection by calling
+ * mlx5_fpga_sbu_conn_destroy.
+ *
+ * Return: A new connection, or ERR_PTR() error value otherwise.
+ */
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+			  struct mlx5_fpga_conn_attr *attr);
+
+/**
+ * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection
+ * @conn: The FPGA SBU connection to destroy
+ *
+ * Cleans up an FPGA SBU connection which was previously created with
+ * mlx5_fpga_sbu_conn_create.
+ */
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn);
+
+/**
+ * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet
+ * @fdev: An FPGA SBU connection
+ * @buf: The packet buffer
+ *
+ * Queues a packet for transmission over an FPGA SBU connection.
+ * The buffer should not be modified or freed until completion.
+ * Upon completion, the buf's complete() callback is invoked, indicating the
+ * success or error status of the transmission.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+			       struct mlx5_fpga_dma_buf *buf);
+
+/**
+ * mlx5_fpga_mem_read() - Read from FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to read, in bytes
+ * @addr: Starting address to read from, in FPGA address space
+ * @buf: Buffer to read into
+ * @access_type: Method for reading
+ *
+ * Reads from the specified address into the specified buffer.
+ * The address may point to configuration space or to DDR.
+ * Large reads may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+		       void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_mem_write() - Write to FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to write, in bytes
+ * @addr: Starting address to write to, in FPGA address space
+ * @buf: Buffer which contains data to write
+ * @access_type: Method for writing
+ *
+ * Writes the specified buffer data to FPGA memory at the specified address.
+ * The address may point to configuration space or to DDR.
+ * Large writes may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+			void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities
+ * @fdev: The FPGA device
+ * @size: Size of the buffer to read into
+ * @buf: Buffer to read the capabilities into
+ *
+ * Reads the FPGA SBU capabilities into the specified buffer.
+ * The format of the capabilities buffer is SBU-dependent.
+ *
+ * Return: 0 if successful
+ *         -EINVAL if the buffer is not large enough to contain SBU caps
+ *         or any other error value otherwise.
+ */
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf);
+
+#endif /* MLX5_FPGA_SDK_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 0648a659b21d..4b6b03d6297f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -67,6 +67,7 @@ enum {
 
 enum {
 	MLX5_DROP_NEW_HEALTH_WORK,
+	MLX5_DROP_NEW_RECOVERY_WORK,
 };
 
 static u8 get_nic_state(struct mlx5_core_dev *dev)
@@ -194,7 +195,7 @@ static void health_care(struct work_struct *work)
 	mlx5_handle_bad_state(dev);
 
 	spin_lock_irqsave(&health->wq_lock, flags);
-	if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+	if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
 		schedule_delayed_work(&health->recover_work, recover_delay);
 	else
 		dev_err(&dev->pdev->dev,
@@ -322,6 +323,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 	init_timer(&health->timer);
 	health->sick = 0;
 	clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+	clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 	health->health = &dev->iseg->health;
 	health->health_counter = &dev->iseg->health_counter;
 
@@ -345,11 +347,22 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 
 	spin_lock_irqsave(&health->wq_lock, flags);
 	set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+	set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 	spin_unlock_irqrestore(&health->wq_lock, flags);
 	cancel_delayed_work_sync(&health->recover_work);
 	cancel_work_sync(&health->work);
 }
 
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	spin_lock(&health->wq_lock);
+	set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+	spin_unlock(&health->wq_lock);
+	cancel_delayed_work_sync(&dev->priv.health.recover_work);
+}
+
 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
new file mode 100644
index 000000000000..de2aed44ab85
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include "mlx5_core.h"
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev)
+{
+	unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size);
+
+	ida_init(&dev->roce.reserved_gids.ida);
+	dev->roce.reserved_gids.start = tblsz;
+	dev->roce.reserved_gids.count = 0;
+}
+
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
+{
+	WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida));
+	dev->roce.reserved_gids.start = 0;
+	dev->roce.reserved_gids.count = 0;
+	ida_destroy(&dev->roce.reserved_gids.ida);
+}
+
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+		mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n");
+		return -EPERM;
+	}
+	if (dev->roce.reserved_gids.start < count) {
+		mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
+			       count);
+		return -ENOMEM;
+	}
+	if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) {
+		mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count);
+		return -ENOMEM;
+	}
+
+	dev->roce.reserved_gids.start -= count;
+	dev->roce.reserved_gids.count += count;
+	mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n",
+		      dev->roce.reserved_gids.count,
+		      dev->roce.reserved_gids.start);
+	return 0;
+}
+
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+	WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up");
+	WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
+	     count, dev->roce.reserved_gids.count);
+
+	dev->roce.reserved_gids.start += count;
+	dev->roce.reserved_gids.count -= count;
+	mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n",
+		      dev->roce.reserved_gids.count,
+		      dev->roce.reserved_gids.start);
+}
+
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
+{
+	int end = dev->roce.reserved_gids.start +
+		  dev->roce.reserved_gids.count;
+	int index = 0;
+
+	index = ida_simple_get(&dev->roce.reserved_gids.ida,
+			       dev->roce.reserved_gids.start, end,
+			       GFP_KERNEL);
+	if (index < 0)
+		return index;
+
+	mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index);
+	*gid_index = index;
+	return 0;
+}
+
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index)
+{
+	mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index);
+	ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index);
+}
+
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev)
+{
+	return dev->roce.reserved_gids.count;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count);
+
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+			   u8 roce_version, u8 roce_l3_type, const u8 *gid,
+			   const u8 *mac, bool vlan, u16 vlan_id)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+	u32  in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
+	void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+	char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+					  source_l3_address);
+	void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+				      source_mac_47_32);
+	int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address);
+
+	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return -EINVAL;
+
+	if (gid) {
+		if (vlan) {
+			MLX5_SET_RA(in_addr, vlan_valid, 1);
+			MLX5_SET_RA(in_addr, vlan_id, vlan_id);
+		}
+
+		ether_addr_copy(addr_mac, mac);
+		MLX5_SET_RA(in_addr, roce_version, roce_version);
+		MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type);
+		memcpy(addr_l3_addr, gid, gidsz);
+	}
+
+	MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+	MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_roce_gid_set);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
new file mode 100644
index 000000000000..7550b1cc8c6a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_MLX5_H__
+#define __LIB_MLX5_H__
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
+int  mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+int  mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c7f75e12c13b..c065132b956d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -56,7 +56,9 @@
 #ifdef CONFIG_MLX5_CORE_EN
 #include "eswitch.h"
 #endif
+#include "lib/mlx5.h"
 #include "fpga/core.h"
+#include "accel/ipsec.h"
 
 MODULE_AUTHOR("Eli Cohen <[email protected]>");
 MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
@@ -936,6 +938,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 
 	mlx5_init_mkey_table(dev);
 
+	mlx5_init_reserved_gids(dev);
+
 	err = mlx5_init_rl_table(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init rate limiting\n");
@@ -956,8 +960,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 		goto err_eswitch_cleanup;
 	}
 
+	err = mlx5_fpga_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init fpga device %d\n", err);
+		goto err_sriov_cleanup;
+	}
+
 	return 0;
 
+err_sriov_cleanup:
+	mlx5_sriov_cleanup(dev);
 err_eswitch_cleanup:
 #ifdef CONFIG_MLX5_CORE_EN
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -981,11 +993,13 @@ out:
 
 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
+	mlx5_fpga_cleanup(dev);
 	mlx5_sriov_cleanup(dev);
 #ifdef CONFIG_MLX5_CORE_EN
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
 #endif
 	mlx5_cleanup_rl_table(dev);
+	mlx5_cleanup_reserved_gids(dev);
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
@@ -1020,7 +1034,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	if (err) {
 		dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
 			FW_PRE_INIT_TIMEOUT_MILI);
-		goto out;
+		goto out_err;
 	}
 
 	err = mlx5_cmd_init(dev);
@@ -1117,16 +1131,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_disable_msix;
 	}
 
-	err = mlx5_fpga_device_init(dev);
-	if (err) {
-		dev_err(&pdev->dev, "fpga device init failed %d\n", err);
-		goto err_put_uars;
-	}
-
 	err = mlx5_start_eqs(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
-		goto err_fpga_init;
+		goto err_put_uars;
 	}
 
 	err = alloc_comp_eqs(dev);
@@ -1160,7 +1168,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	err = mlx5_fpga_device_start(dev);
 	if (err) {
 		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
-		goto err_reg_dev;
+		goto err_fpga_start;
+	}
+	err = mlx5_accel_ipsec_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+		goto err_ipsec_start;
 	}
 
 	if (mlx5_device_registered(dev)) {
@@ -1181,6 +1194,11 @@ out:
 	return 0;
 
 err_reg_dev:
+	mlx5_accel_ipsec_cleanup(dev);
+err_ipsec_start:
+	mlx5_fpga_device_stop(dev);
+
+err_fpga_start:
 	mlx5_sriov_detach(dev);
 
 err_sriov:
@@ -1198,9 +1216,6 @@ err_affinity_hints:
 err_stop_eqs:
 	mlx5_stop_eqs(dev);
 
-err_fpga_init:
-	mlx5_fpga_device_cleanup(dev);
-
 err_put_uars:
 	mlx5_put_uars_page(dev, priv->uar);
 
@@ -1243,7 +1258,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	int err = 0;
 
 	if (cleanup)
-		mlx5_drain_health_wq(dev);
+		mlx5_drain_health_recovery(dev);
 
 	mutex_lock(&dev->intf_state_mutex);
 	if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
@@ -1254,9 +1269,15 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto out;
 	}
 
+	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+	set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
+
 	if (mlx5_device_registered(dev))
 		mlx5_detach_device(dev);
 
+	mlx5_accel_ipsec_cleanup(dev);
+	mlx5_fpga_device_stop(dev);
+
 	mlx5_sriov_detach(dev);
 #ifdef CONFIG_MLX5_CORE_EN
 	mlx5_eswitch_detach(dev->priv.eswitch);
@@ -1265,7 +1286,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	mlx5_irq_clear_affinity_hints(dev);
 	free_comp_eqs(dev);
 	mlx5_stop_eqs(dev);
-	mlx5_fpga_device_cleanup(dev);
 	mlx5_put_uars_page(dev, priv->uar);
 	mlx5_disable_msix(dev);
 	if (cleanup)
@@ -1282,8 +1302,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	mlx5_cmd_cleanup(dev);
 
 out:
-	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
-	set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
 	mutex_unlock(&dev->intf_state_mutex);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 06019d00ab7b..5abfec1c3399 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -926,12 +926,16 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
 
 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
 {
+	if (atomic_inc_return(&mdev->roce.roce_en) != 1)
+		return 0;
 	return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
 }
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
 
 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
 {
+	if (atomic_dec_return(&mdev->roce.roce_en) != 0)
+		return 0;
 	return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
 }
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 921673c42bc9..6bcfc25350f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -54,6 +54,12 @@ static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
 	return mlx5_wq_cyc_get_size(wq) << wq->log_stride;
 }
 
+static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
+{
+	return mlx5_wq_cyc_get_byte_size(&wq->rq) +
+	       mlx5_wq_cyc_get_byte_size(&wq->sq);
+}
+
 static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
 {
 	return mlx5_cqwq_get_size(wq) << wq->log_stride;
@@ -99,6 +105,46 @@ err_db_free:
 	return err;
 }
 
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+		      void *qpc, struct mlx5_wq_qp *wq,
+		      struct mlx5_wq_ctrl *wq_ctrl)
+{
+	int err;
+
+	wq->rq.log_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+	wq->rq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_rq_size)) - 1;
+
+	wq->sq.log_stride = ilog2(MLX5_SEND_WQE_BB);
+	wq->sq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_sq_size)) - 1;
+
+	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+	if (err) {
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+		return err;
+	}
+
+	err = mlx5_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+				  &wq_ctrl->buf, param->buf_numa_node);
+	if (err) {
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+		goto err_db_free;
+	}
+
+	wq->rq.buf = wq_ctrl->buf.direct.buf;
+	wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq);
+	wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
+	wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
+
+	wq_ctrl->mdev = mdev;
+
+	return 0;
+
+err_db_free:
+	mlx5_db_free(mdev, &wq_ctrl->db);
+
+	return err;
+}
+
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
 		     struct mlx5_frag_wq_ctrl *wq_ctrl)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index d8afed898c31..718589d0cec2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -34,6 +34,8 @@
 #define __MLX5_WQ_H__
 
 #include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
 
 struct mlx5_wq_param {
 	int		linear;
@@ -60,6 +62,11 @@ struct mlx5_wq_cyc {
 	u8			log_stride;
 };
 
+struct mlx5_wq_qp {
+	struct mlx5_wq_cyc	rq;
+	struct mlx5_wq_cyc	sq;
+};
+
 struct mlx5_cqwq {
 	struct mlx5_frag_buf	frag_buf;
 	__be32			*db;
@@ -87,6 +94,10 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
 
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+		      void *qpc, struct mlx5_wq_qp *wq,
+		      struct mlx5_wq_ctrl *wq_ctrl);
+
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
 		     struct mlx5_frag_wq_ctrl *wq_ctrl);
@@ -146,6 +157,22 @@ static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq)
 	*wq->db = cpu_to_be32(wq->cc & 0xffffff);
 }
 
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
+{
+	u32 ci = mlx5_cqwq_get_ci(wq);
+	struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
+	u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
+	u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1;
+
+	if (cqe_ownership_bit != sw_ownership_val)
+		return NULL;
+
+	/* ensure cqe content is read after cqe ownership bit */
+	dma_rmb();
+
+	return cqe;
+}
+
 static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq)
 {
 	return wq->cur_sz == wq->sz_m1;
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
index 9ca85383aa35..7a712b6b09ec 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
@@ -96,7 +96,7 @@ struct mlxfw_dev {
 	u16 psid_size;
 };
 
-#if IS_ENABLED(CONFIG_MLXFW)
+#if IS_REACHABLE(CONFIG_MLXFW)
 int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
 			 const struct firmware *firmware);
 #else
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 700cc8c6aa5b..192cb93e7669 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3301,6 +3301,9 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
 	u16 vid = vlan_dev_vlan_id(vlan_dev);
 
+	if (netif_is_bridge_port(vlan_dev))
+		return 0;
+
 	if (mlxsw_sp_port_dev_check(real_dev))
 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
 							 event, vid);
diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
index 0d5a7b9203a4..0e331e2f685a 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -25,6 +25,16 @@ config NFP
 	  cards working as a advanced Ethernet NIC.  It works with both
 	  SR-IOV physical and virtual functions.
 
+config NFP_APP_FLOWER
+	bool "NFP4000/NFP6000 TC Flower offload support"
+	depends on NFP
+	depends on NET_SWITCHDEV
+	---help---
+	  Enable driver support for TC Flower offload on NFP4000 and NFP6000.
+	  Say Y, if you are planning to make use of TC Flower offload
+	  either directly, with Open vSwitch, or any other way.  Note that
+	  TC Flower offload requires specific FW to work.
+
 config NFP_DEBUG
 	bool "Debug support for Netronome(R) NFP4000/NFP6000 NIC drivers"
 	depends on NFP
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 10b556b2c59d..b8e1358868bd 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -27,9 +27,17 @@ nfp-objs := \
 	    nfp_port.o \
 	    bpf/main.o \
 	    bpf/offload.o \
+	    nic/main.o
+
+ifeq ($(CONFIG_NFP_APP_FLOWER),y)
+nfp-objs += \
+	    flower/action.o \
 	    flower/cmsg.o \
 	    flower/main.o \
-	    nic/main.o
+	    flower/match.o \
+	    flower/metadata.o \
+	    flower/offload.o
+endif
 
 ifeq ($(CONFIG_BPF_SYSCALL),y)
 nfp-objs += \
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
new file mode 100644
index 000000000000..db9750695dc7
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitfield.h>
+#include <net/pkt_cls.h>
+#include <net/switchdev.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfp_net_repr.h"
+
+static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
+{
+	size_t act_size = sizeof(struct nfp_fl_pop_vlan);
+	u16 tmp_pop_vlan_op;
+
+	tmp_pop_vlan_op =
+		FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+		FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_POP_VLAN);
+
+	pop_vlan->a_op = cpu_to_be16(tmp_pop_vlan_op);
+	pop_vlan->reserved = 0;
+}
+
+static void
+nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
+		 const struct tc_action *action)
+{
+	size_t act_size = sizeof(struct nfp_fl_push_vlan);
+	struct tcf_vlan *vlan = to_vlan(action);
+	u16 tmp_push_vlan_tci;
+	u16 tmp_push_vlan_op;
+
+	tmp_push_vlan_op =
+		FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+		FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PUSH_VLAN);
+
+	push_vlan->a_op = cpu_to_be16(tmp_push_vlan_op);
+	/* Set action push vlan parameters. */
+	push_vlan->reserved = 0;
+	push_vlan->vlan_tpid = tcf_vlan_push_proto(action);
+
+	tmp_push_vlan_tci =
+		FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, vlan->tcfv_push_prio) |
+		FIELD_PREP(NFP_FL_PUSH_VLAN_VID, vlan->tcfv_push_vid) |
+		NFP_FL_PUSH_VLAN_CFI;
+	push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
+}
+
+static int
+nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
+	      struct nfp_fl_payload *nfp_flow, bool last,
+	      struct net_device *in_dev)
+{
+	size_t act_size = sizeof(struct nfp_fl_output);
+	struct net_device *out_dev;
+	u16 tmp_output_op;
+	int ifindex;
+
+	/* Set action opcode to output action. */
+	tmp_output_op =
+		FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+		FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_OUTPUT);
+
+	output->a_op = cpu_to_be16(tmp_output_op);
+
+	/* Set action output parameters. */
+	output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0);
+
+	ifindex = tcf_mirred_ifindex(action);
+	out_dev = __dev_get_by_index(dev_net(in_dev), ifindex);
+	if (!out_dev)
+		return -EOPNOTSUPP;
+
+	/* Only offload egress ports are on the same device as the ingress
+	 * port.
+	 */
+	if (!switchdev_port_same_parent_id(in_dev, out_dev))
+		return -EOPNOTSUPP;
+
+	output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
+	if (!output->port)
+		return -EOPNOTSUPP;
+
+	nfp_flow->meta.shortcut = output->port;
+
+	return 0;
+}
+
+static int
+nfp_flower_loop_action(const struct tc_action *a,
+		       struct nfp_fl_payload *nfp_fl, int *a_len,
+		       struct net_device *netdev)
+{
+	struct nfp_fl_push_vlan *psh_v;
+	struct nfp_fl_pop_vlan *pop_v;
+	struct nfp_fl_output *output;
+	int err;
+
+	if (is_tcf_gact_shot(a)) {
+		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP);
+	} else if (is_tcf_mirred_egress_redirect(a)) {
+		if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
+			return -EOPNOTSUPP;
+
+		output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
+		err = nfp_fl_output(output, a, nfp_fl, true, netdev);
+		if (err)
+			return err;
+
+		*a_len += sizeof(struct nfp_fl_output);
+	} else if (is_tcf_mirred_egress_mirror(a)) {
+		if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
+			return -EOPNOTSUPP;
+
+		output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
+		err = nfp_fl_output(output, a, nfp_fl, false, netdev);
+		if (err)
+			return err;
+
+		*a_len += sizeof(struct nfp_fl_output);
+	} else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
+		if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ)
+			return -EOPNOTSUPP;
+
+		pop_v = (struct nfp_fl_pop_vlan *)&nfp_fl->action_data[*a_len];
+		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_POPV);
+
+		nfp_fl_pop_vlan(pop_v);
+		*a_len += sizeof(struct nfp_fl_pop_vlan);
+	} else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
+		if (*a_len + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ)
+			return -EOPNOTSUPP;
+
+		psh_v = (struct nfp_fl_push_vlan *)&nfp_fl->action_data[*a_len];
+		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+		nfp_fl_push_vlan(psh_v, a);
+		*a_len += sizeof(struct nfp_fl_push_vlan);
+	} else {
+		/* Currently we do not handle any other actions. */
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
+			      struct net_device *netdev,
+			      struct nfp_fl_payload *nfp_flow)
+{
+	int act_len, act_cnt, err;
+	const struct tc_action *a;
+	LIST_HEAD(actions);
+
+	memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ);
+	nfp_flow->meta.act_len = 0;
+	act_len = 0;
+	act_cnt = 0;
+
+	tcf_exts_to_list(flow->exts, &actions);
+	list_for_each_entry(a, &actions, list) {
+		err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev);
+		if (err)
+			return err;
+		act_cnt++;
+	}
+
+	/* We optimise when the action list is small, this can unfortunately
+	 * not happen once we have more than one action in the action list.
+	 */
+	if (act_cnt > 1)
+		nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+	nfp_flow->meta.act_len = act_len;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index 7761be436726..dd7fa9cf225f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -36,6 +36,7 @@
 #include <linux/skbuff.h>
 #include <net/dst_metadata.h>
 
+#include "main.h"
 #include "../nfpcore/nfp_cpp.h"
 #include "../nfp_net_repr.h"
 #include "./cmsg.h"
@@ -52,12 +53,7 @@ nfp_flower_cmsg_get_hdr(struct sk_buff *skb)
 	return (struct nfp_flower_cmsg_hdr *)skb->data;
 }
 
-static void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
-{
-	return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
-}
-
-static struct sk_buff *
+struct sk_buff *
 nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
 		      enum nfp_flower_cmsg_type_port type)
 {
@@ -79,9 +75,8 @@ nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
 	return skb;
 }
 
-int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok)
+int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok)
 {
-	struct nfp_repr *repr = netdev_priv(netdev);
 	struct nfp_flower_cmsg_portmod *msg;
 	struct sk_buff *skb;
 
@@ -94,7 +89,7 @@ int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok)
 	msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id);
 	msg->reserved = 0;
 	msg->info = carrier_ok;
-	msg->mtu = cpu_to_be16(netdev->mtu);
+	msg->mtu = cpu_to_be16(repr->netdev->mtu);
 
 	nfp_ctrl_tx(repr->app->ctrl, skb);
 
@@ -149,6 +144,9 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
 	case NFP_FLOWER_CMSG_TYPE_PORT_MOD:
 		nfp_flower_cmsg_portmod_rx(app, skb);
 		break;
+	case NFP_FLOWER_CMSG_TYPE_FLOW_STATS:
+		nfp_flower_rx_flow_stats(app, skb);
+		break;
 	default:
 		nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
 				     type);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 2eeddada7f4d..cf738de170ab 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -40,6 +40,196 @@
 
 #include "../nfp_app.h"
 
+#define NFP_FLOWER_LAYER_META		BIT(0)
+#define NFP_FLOWER_LAYER_PORT		BIT(1)
+#define NFP_FLOWER_LAYER_MAC		BIT(2)
+#define NFP_FLOWER_LAYER_TP		BIT(3)
+#define NFP_FLOWER_LAYER_IPV4		BIT(4)
+#define NFP_FLOWER_LAYER_IPV6		BIT(5)
+#define NFP_FLOWER_LAYER_CT		BIT(6)
+#define NFP_FLOWER_LAYER_VXLAN		BIT(7)
+
+#define NFP_FLOWER_LAYER_ETHER		BIT(3)
+#define NFP_FLOWER_LAYER_ARP		BIT(4)
+
+#define NFP_FLOWER_MASK_VLAN_PRIO	GENMASK(15, 13)
+#define NFP_FLOWER_MASK_VLAN_CFI	BIT(12)
+#define NFP_FLOWER_MASK_VLAN_VID	GENMASK(11, 0)
+
+#define NFP_FL_SC_ACT_DROP		0x80000000
+#define NFP_FL_SC_ACT_USER		0x7D000000
+#define NFP_FL_SC_ACT_POPV		0x6A000000
+#define NFP_FL_SC_ACT_NULL		0x00000000
+
+/* The maximum action list size (in bytes) supported by the NFP.
+ */
+#define NFP_FL_MAX_A_SIZ		1216
+#define NFP_FL_LW_SIZ			2
+
+/* Action opcodes */
+#define NFP_FL_ACTION_OPCODE_OUTPUT	0
+#define NFP_FL_ACTION_OPCODE_PUSH_VLAN	1
+#define NFP_FL_ACTION_OPCODE_POP_VLAN	2
+#define NFP_FL_ACTION_OPCODE_NUM	32
+
+#define NFP_FL_ACT_JMP_ID		GENMASK(15, 8)
+#define NFP_FL_ACT_LEN_LW		GENMASK(7, 0)
+
+#define NFP_FL_OUT_FLAGS_LAST		BIT(15)
+#define NFP_FL_OUT_FLAGS_USE_TUN	BIT(4)
+#define NFP_FL_OUT_FLAGS_TYPE_IDX	GENMASK(2, 0)
+
+#define NFP_FL_PUSH_VLAN_PRIO		GENMASK(15, 13)
+#define NFP_FL_PUSH_VLAN_CFI		BIT(12)
+#define NFP_FL_PUSH_VLAN_VID		GENMASK(11, 0)
+
+struct nfp_fl_output {
+	__be16 a_op;
+	__be16 flags;
+	__be32 port;
+};
+
+struct nfp_fl_push_vlan {
+	__be16 a_op;
+	__be16 reserved;
+	__be16 vlan_tpid;
+	__be16 vlan_tci;
+};
+
+struct nfp_fl_pop_vlan {
+	__be16 a_op;
+	__be16 reserved;
+};
+
+/* Metadata without L2 (1W/4B)
+ * ----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  key_layers   |    mask_id    |           reserved            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_meta_one {
+	u8 nfp_flow_key_layer;
+	u8 mask_id;
+	u16 reserved;
+};
+
+/* Metadata with L2 (1W/4B)
+ * ----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |    key_type   |    mask_id    | PCP |p|   vlan outermost VID  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *                                 ^                               ^
+ *                           NOTE: |             TCI               |
+ *                                 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_meta_two {
+	u8 nfp_flow_key_layer;
+	u8 mask_id;
+	__be16 tci;
+};
+
+/* Port details (1W/4B)
+ * ----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         port_ingress                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_in_port {
+	__be32 in_port;
+};
+
+/* L2 details (4W/16B)
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     mac_addr_dst, 31 - 0                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      mac_addr_dst, 47 - 32    |     mac_addr_src, 15 - 0      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     mac_addr_src, 47 - 16                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |       mpls outermost label            |  TC |B|   reserved  |q|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_mac_mpls {
+	u8 mac_dst[6];
+	u8 mac_src[6];
+	__be32 mpls_lse;
+};
+
+/* L4 ports (for UDP, TCP, SCTP) (1W/4B)
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |            port_src           |           port_dst            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_tp_ports {
+	__be16 port_src;
+	__be16 port_dst;
+};
+
+/* L3 IPv4 details (3W/12B)
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |    DSCP   |ECN|   protocol    |           reserved            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                        ipv4_addr_src                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                        ipv4_addr_dst                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_ipv4 {
+	u8 tos;
+	u8 proto;
+	u8 ttl;
+	u8 reserved;
+	__be32 ipv4_src;
+	__be32 ipv4_dst;
+};
+
+/* L3 IPv6 details (10W/40B)
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |    DSCP   |ECN|   protocol    |          reserved             |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   ipv6_exthdr   | res |            ipv6_flow_label            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,   31 - 0                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,  63 - 32                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,  95 - 64                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src, 127 - 96                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,   31 - 0                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,  63 - 32                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,  95 - 64                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst, 127 - 96                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_ipv6 {
+	u8 tos;
+	u8 proto;
+	u8 ttl;
+	u8 reserved;
+	__be32 ipv6_flow_label_exthdr;
+	struct in6_addr ipv6_src;
+	struct in6_addr ipv6_dst;
+};
+
 /* The base header for a control message packet.
  * Defines an 8-bit version, and an 8-bit type, padded
  * to a 32-bit word. Rest of the packet is type-specific.
@@ -55,7 +245,10 @@ struct nfp_flower_cmsg_hdr {
 
 /* Types defined for port related control messages  */
 enum nfp_flower_cmsg_type_port {
+	NFP_FLOWER_CMSG_TYPE_FLOW_ADD =		0,
+	NFP_FLOWER_CMSG_TYPE_FLOW_DEL =		2,
 	NFP_FLOWER_CMSG_TYPE_PORT_MOD =		8,
+	NFP_FLOWER_CMSG_TYPE_FLOW_STATS =	15,
 	NFP_FLOWER_CMSG_TYPE_PORT_ECHO =	16,
 	NFP_FLOWER_CMSG_TYPE_MAX =		32,
 };
@@ -110,7 +303,15 @@ nfp_flower_cmsg_pcie_port(u8 nfp_pcie, enum nfp_flower_cmsg_port_vnic_type type,
 			   NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT);
 }
 
-int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok);
+static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
+{
+	return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
+}
+
+int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok);
 void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb);
+struct sk_buff *
+nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
+		      enum nfp_flower_cmsg_type_port type);
 
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 8e5ca6b4bb33..5fe6d3582597 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -34,10 +34,13 @@
 #include <linux/etherdevice.h>
 #include <linux/pci.h>
 #include <linux/skbuff.h>
+#include <linux/vmalloc.h>
 #include <net/devlink.h>
 #include <net/dst_metadata.h>
 
+#include "main.h"
 #include "../nfpcore/nfp_cpp.h"
+#include "../nfpcore/nfp_nffw.h"
 #include "../nfpcore/nfp_nsp.h"
 #include "../nfp_app.h"
 #include "../nfp_main.h"
@@ -46,13 +49,7 @@
 #include "../nfp_port.h"
 #include "./cmsg.h"
 
-/**
- * struct nfp_flower_priv - Flower APP per-vNIC priv data
- * @nn:		     Pointer to vNIC
- */
-struct nfp_flower_priv {
-	struct nfp_net *nn;
-};
+#define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL
 
 static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn)
 {
@@ -104,51 +101,30 @@ nfp_flower_repr_get(struct nfp_app *app, u32 port_id)
 	return reprs->reprs[port];
 }
 
-static void
-nfp_flower_repr_netdev_get_stats64(struct net_device *netdev,
-				   struct rtnl_link_stats64 *stats)
-{
-	struct nfp_repr *repr = netdev_priv(netdev);
-	enum nfp_repr_type type;
-	u32 port_id;
-	u8 port = 0;
-
-	port_id = repr->dst->u.port_info.port_id;
-	type = nfp_flower_repr_get_type_and_port(repr->app, port_id, &port);
-	nfp_repr_get_stats64(repr->app, type, port, stats);
-}
-
-static int nfp_flower_repr_netdev_open(struct net_device *netdev)
+static int
+nfp_flower_repr_netdev_open(struct nfp_app *app, struct nfp_repr *repr)
 {
 	int err;
 
-	err = nfp_flower_cmsg_portmod(netdev, true);
+	err = nfp_flower_cmsg_portmod(repr, true);
 	if (err)
 		return err;
 
-	netif_carrier_on(netdev);
-	netif_tx_wake_all_queues(netdev);
+	netif_carrier_on(repr->netdev);
+	netif_tx_wake_all_queues(repr->netdev);
 
 	return 0;
 }
 
-static int nfp_flower_repr_netdev_stop(struct net_device *netdev)
+static int
+nfp_flower_repr_netdev_stop(struct nfp_app *app, struct nfp_repr *repr)
 {
-	netif_carrier_off(netdev);
-	netif_tx_disable(netdev);
+	netif_carrier_off(repr->netdev);
+	netif_tx_disable(repr->netdev);
 
-	return nfp_flower_cmsg_portmod(netdev, false);
+	return nfp_flower_cmsg_portmod(repr, false);
 }
 
-static const struct net_device_ops nfp_flower_repr_netdev_ops = {
-	.ndo_open		= nfp_flower_repr_netdev_open,
-	.ndo_stop		= nfp_flower_repr_netdev_stop,
-	.ndo_start_xmit		= nfp_repr_xmit,
-	.ndo_get_stats64	= nfp_flower_repr_netdev_get_stats64,
-	.ndo_has_offload_stats	= nfp_repr_has_offload_stats,
-	.ndo_get_offload_stats	= nfp_repr_get_offload_stats,
-};
-
 static void nfp_flower_sriov_disable(struct nfp_app *app)
 {
 	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF);
@@ -162,14 +138,19 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
 	u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp);
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_reprs *reprs, *old_reprs;
+	enum nfp_port_type port_type;
 	const u8 queue = 0;
 	int i, err;
 
+	port_type = repr_type == NFP_REPR_TYPE_PF ? NFP_PORT_PF_PORT :
+						    NFP_PORT_VF_PORT;
+
 	reprs = nfp_reprs_alloc(cnt);
 	if (!reprs)
 		return -ENOMEM;
 
 	for (i = 0; i < cnt; i++) {
+		struct nfp_port *port;
 		u32 port_id;
 
 		reprs->reprs[i] = nfp_repr_alloc(app);
@@ -178,15 +159,24 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
 			goto err_reprs_clean;
 		}
 
+		port = nfp_port_alloc(app, port_type, reprs->reprs[i]);
+		if (repr_type == NFP_REPR_TYPE_PF) {
+			port->pf_id = i;
+		} else {
+			port->pf_id = 0; /* For now we only support 1 PF */
+			port->vf_id = i;
+		}
+
 		eth_hw_addr_random(reprs->reprs[i]);
 
 		port_id = nfp_flower_cmsg_pcie_port(nfp_pcie, vnic_type,
 						    i, queue);
 		err = nfp_repr_init(app, reprs->reprs[i],
-				    &nfp_flower_repr_netdev_ops,
-				    port_id, NULL, priv->nn->dp.netdev);
-		if (err)
+				    port_id, port, priv->nn->dp.netdev);
+		if (err) {
+			nfp_port_free(port);
 			goto err_reprs_clean;
+		}
 
 		nfp_info(app->cpp, "%s%d Representor(%s) created\n",
 			 repr_type == NFP_REPR_TYPE_PF ? "PF" : "VF", i,
@@ -260,7 +250,6 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
 
 		cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port);
 		err = nfp_repr_init(app, reprs->reprs[phys_port],
-				    &nfp_flower_repr_netdev_ops,
 				    cmsg_port_id, port, priv->nn->dp.netdev);
 		if (err) {
 			nfp_port_free(port);
@@ -296,26 +285,16 @@ static int nfp_flower_start(struct nfp_app *app)
 					   NFP_REPR_TYPE_PF, 1);
 }
 
-static void nfp_flower_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
-{
-	kfree(app->priv);
-	app->priv = NULL;
-}
-
 static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn,
 				unsigned int id)
 {
-	struct nfp_flower_priv *priv;
+	struct nfp_flower_priv *priv = app->priv;
 
 	if (id > 0) {
 		nfp_warn(app->cpp, "FlowerNIC doesn't support more than one data vNIC\n");
 		goto err_invalid_port;
 	}
 
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-	app->priv = priv;
 	priv->nn = nn;
 
 	eth_hw_addr_random(nn->dp.netdev);
@@ -331,6 +310,8 @@ err_invalid_port:
 static int nfp_flower_init(struct nfp_app *app)
 {
 	const struct nfp_pf *pf = app->pf;
+	u64 version;
+	int err;
 
 	if (!pf->eth_tbl) {
 		nfp_warn(app->cpp, "FlowerNIC requires eth table\n");
@@ -347,7 +328,37 @@ static int nfp_flower_init(struct nfp_app *app)
 		return -EINVAL;
 	}
 
+	version = nfp_rtsym_read_le(app->pf->rtbl, "hw_flower_version", &err);
+	if (err) {
+		nfp_warn(app->cpp, "FlowerNIC requires hw_flower_version memory symbol\n");
+		return err;
+	}
+
+	/* We need to ensure hardware has enough flower capabilities. */
+	if (version != NFP_FLOWER_ALLOWED_VER) {
+		nfp_warn(app->cpp, "FlowerNIC: unsupported firmware version\n");
+		return -EINVAL;
+	}
+
+	app->priv = vzalloc(sizeof(struct nfp_flower_priv));
+	if (!app->priv)
+		return -ENOMEM;
+
+	err = nfp_flower_metadata_init(app);
+	if (err)
+		goto err_free_app_priv;
+
 	return 0;
+
+err_free_app_priv:
+	vfree(app->priv);
+	return err;
+}
+
+static void nfp_flower_clean(struct nfp_app *app)
+{
+	vfree(app->priv);
+	app->priv = NULL;
 }
 
 const struct nfp_app_type app_flower = {
@@ -358,9 +369,12 @@ const struct nfp_app_type app_flower = {
 	.extra_cap	= nfp_flower_extra_cap,
 
 	.init		= nfp_flower_init,
+	.clean		= nfp_flower_clean,
 
 	.vnic_init	= nfp_flower_vnic_init,
-	.vnic_clean	= nfp_flower_vnic_clean,
+
+	.repr_open	= nfp_flower_repr_netdev_open,
+	.repr_stop	= nfp_flower_repr_netdev_stop,
 
 	.start		= nfp_flower_start,
 	.stop		= nfp_flower_stop,
@@ -372,4 +386,6 @@ const struct nfp_app_type app_flower = {
 
 	.eswitch_mode_get  = eswitch_mode_get,
 	.repr_get	= nfp_flower_repr_get,
+
+	.setup_tc	= nfp_flower_setup_tc,
 };
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
new file mode 100644
index 000000000000..9e64c048e83f
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_FLOWER_H__
+#define __NFP_FLOWER_H__ 1
+
+#include <linux/circ_buf.h>
+#include <linux/hashtable.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+
+struct tc_to_netdev;
+struct net_device;
+struct nfp_app;
+
+#define NFP_FL_STATS_ENTRY_RS		BIT(20)
+#define NFP_FL_STATS_ELEM_RS		4
+#define NFP_FL_REPEATED_HASH_MAX	BIT(17)
+#define NFP_FLOWER_HASH_BITS		19
+#define NFP_FLOWER_MASK_ENTRY_RS	256
+#define NFP_FLOWER_MASK_ELEMENT_RS	1
+#define NFP_FLOWER_MASK_HASH_BITS	10
+
+#define NFP_FL_META_FLAG_NEW_MASK	128
+#define NFP_FL_META_FLAG_LAST_MASK	1
+
+#define NFP_FL_MASK_REUSE_TIME_NS	40000
+#define NFP_FL_MASK_ID_LOCATION		1
+
+struct nfp_fl_mask_id {
+	struct circ_buf mask_id_free_list;
+	struct timespec64 *last_used;
+	u8 init_unallocated;
+};
+
+struct nfp_fl_stats_id {
+	struct circ_buf free_list;
+	u32 init_unalloc;
+	u8 repeated_em_count;
+};
+
+/**
+ * struct nfp_flower_priv - Flower APP per-vNIC priv data
+ * @nn:			Pointer to vNIC
+ * @mask_id_seed:	Seed used for mask hash table
+ * @flower_version:	HW version of flower
+ * @stats_ids:		List of free stats ids
+ * @mask_ids:		List of free mask ids
+ * @mask_table:		Hash table used to store masks
+ * @flow_table:		Hash table used to store flower rules
+ */
+struct nfp_flower_priv {
+	struct nfp_net *nn;
+	u32 mask_id_seed;
+	u64 flower_version;
+	struct nfp_fl_stats_id stats_ids;
+	struct nfp_fl_mask_id mask_ids;
+	DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS);
+	DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS);
+};
+
+struct nfp_fl_key_ls {
+	u32 key_layer_two;
+	u8 key_layer;
+	int key_size;
+};
+
+struct nfp_fl_rule_metadata {
+	u8 key_len;
+	u8 mask_len;
+	u8 act_len;
+	u8 flags;
+	__be32 host_ctx_id;
+	__be64 host_cookie __packed;
+	__be64 flow_version __packed;
+	__be32 shortcut;
+};
+
+struct nfp_fl_stats {
+	u64 pkts;
+	u64 bytes;
+	u64 used;
+};
+
+struct nfp_fl_payload {
+	struct nfp_fl_rule_metadata meta;
+	unsigned long tc_flower_cookie;
+	struct hlist_node link;
+	struct rcu_head rcu;
+	spinlock_t lock; /* lock stats */
+	struct nfp_fl_stats stats;
+	char *unmasked_data;
+	char *mask_data;
+	char *action_data;
+};
+
+struct nfp_fl_stats_frame {
+	__be32 stats_con_id;
+	__be32 pkt_count;
+	__be64 byte_count;
+	__be64 stats_cookie;
+};
+
+int nfp_flower_metadata_init(struct nfp_app *app);
+void nfp_flower_metadata_cleanup(struct nfp_app *app);
+
+int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
+			u32 handle, __be16 proto, struct tc_to_netdev *tc);
+int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
+				  struct nfp_fl_key_ls *key_ls,
+				  struct net_device *netdev,
+				  struct nfp_fl_payload *nfp_flow);
+int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
+			      struct net_device *netdev,
+			      struct nfp_fl_payload *nfp_flow);
+int nfp_compile_flow_metadata(struct nfp_app *app,
+			      struct tc_cls_flower_offload *flow,
+			      struct nfp_fl_payload *nfp_flow);
+int nfp_modify_flow_metadata(struct nfp_app *app,
+			     struct nfp_fl_payload *nfp_flow);
+
+struct nfp_fl_payload *
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
+struct nfp_fl_payload *
+nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
+
+void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb);
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
new file mode 100644
index 000000000000..0e08404480ef
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitfield.h>
+#include <net/pkt_cls.h>
+
+#include "cmsg.h"
+#include "main.h"
+
+static void
+nfp_flower_compile_meta_tci(struct nfp_flower_meta_two *frame,
+			    struct tc_cls_flower_offload *flow, u8 key_type,
+			    bool mask_version)
+{
+	struct flow_dissector_key_vlan *flow_vlan;
+	u16 tmp_tci;
+
+	/* Populate the metadata frame. */
+	frame->nfp_flow_key_layer = key_type;
+	frame->mask_id = ~0;
+
+	if (mask_version) {
+		frame->tci = cpu_to_be16(~0);
+		return;
+	}
+
+	flow_vlan = skb_flow_dissector_target(flow->dissector,
+					      FLOW_DISSECTOR_KEY_VLAN,
+					      flow->key);
+
+	/* Populate the tci field. */
+	if (!flow_vlan->vlan_id) {
+		tmp_tci = 0;
+	} else {
+		tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+				     flow_vlan->vlan_priority) |
+			  FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+				     flow_vlan->vlan_id) |
+			  NFP_FLOWER_MASK_VLAN_CFI;
+	}
+	frame->tci = cpu_to_be16(tmp_tci);
+}
+
+static void
+nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type)
+{
+	frame->nfp_flow_key_layer = key_type;
+	frame->mask_id = 0;
+	frame->reserved = 0;
+}
+
+static int
+nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
+			bool mask_version)
+{
+	if (mask_version) {
+		frame->in_port = cpu_to_be32(~0);
+		return 0;
+	}
+
+	frame->in_port = cpu_to_be32(cmsg_port);
+
+	return 0;
+}
+
+static void
+nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame,
+		       struct tc_cls_flower_offload *flow,
+		       bool mask_version)
+{
+	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_dissector_key_eth_addrs *flow_mac;
+
+	flow_mac = skb_flow_dissector_target(flow->dissector,
+					     FLOW_DISSECTOR_KEY_ETH_ADDRS,
+					     target);
+
+	memset(frame, 0, sizeof(struct nfp_flower_mac_mpls));
+
+	/* Populate mac frame. */
+	ether_addr_copy(frame->mac_dst, &flow_mac->dst[0]);
+	ether_addr_copy(frame->mac_src, &flow_mac->src[0]);
+
+	if (mask_version)
+		frame->mpls_lse = cpu_to_be32(~0);
+}
+
+static void
+nfp_flower_compile_tport(struct nfp_flower_tp_ports *frame,
+			 struct tc_cls_flower_offload *flow,
+			 bool mask_version)
+{
+	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_dissector_key_ports *flow_tp;
+
+	flow_tp = skb_flow_dissector_target(flow->dissector,
+					    FLOW_DISSECTOR_KEY_PORTS,
+					    target);
+
+	frame->port_src = flow_tp->src;
+	frame->port_dst = flow_tp->dst;
+}
+
+static void
+nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
+			struct tc_cls_flower_offload *flow,
+			bool mask_version)
+{
+	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_dissector_key_ipv4_addrs *flow_ipv4;
+	struct flow_dissector_key_basic *flow_basic;
+
+	flow_ipv4 = skb_flow_dissector_target(flow->dissector,
+					      FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+					      target);
+
+	flow_basic = skb_flow_dissector_target(flow->dissector,
+					       FLOW_DISSECTOR_KEY_BASIC,
+					       target);
+
+	/* Populate IPv4 frame. */
+	frame->reserved = 0;
+	frame->ipv4_src = flow_ipv4->src;
+	frame->ipv4_dst = flow_ipv4->dst;
+	frame->proto = flow_basic->ip_proto;
+	/* Wildcard TOS/TTL for now. */
+	frame->tos = 0;
+	frame->ttl = 0;
+}
+
+static void
+nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
+			struct tc_cls_flower_offload *flow,
+			bool mask_version)
+{
+	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_dissector_key_ipv6_addrs *flow_ipv6;
+	struct flow_dissector_key_basic *flow_basic;
+
+	flow_ipv6 = skb_flow_dissector_target(flow->dissector,
+					      FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+					      target);
+
+	flow_basic = skb_flow_dissector_target(flow->dissector,
+					       FLOW_DISSECTOR_KEY_BASIC,
+					       target);
+
+	/* Populate IPv6 frame. */
+	frame->reserved = 0;
+	frame->ipv6_src = flow_ipv6->src;
+	frame->ipv6_dst = flow_ipv6->dst;
+	frame->proto = flow_basic->ip_proto;
+	/* Wildcard LABEL/TOS/TTL for now. */
+	frame->ipv6_flow_label_exthdr = 0;
+	frame->tos = 0;
+	frame->ttl = 0;
+}
+
+int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
+				  struct nfp_fl_key_ls *key_ls,
+				  struct net_device *netdev,
+				  struct nfp_fl_payload *nfp_flow)
+{
+	int err;
+	u8 *ext;
+	u8 *msk;
+
+	memset(nfp_flow->unmasked_data, 0, key_ls->key_size);
+	memset(nfp_flow->mask_data, 0, key_ls->key_size);
+
+	ext = nfp_flow->unmasked_data;
+	msk = nfp_flow->mask_data;
+	if (NFP_FLOWER_LAYER_PORT & key_ls->key_layer) {
+		/* Populate Exact Metadata. */
+		nfp_flower_compile_meta_tci((struct nfp_flower_meta_two *)ext,
+					    flow, key_ls->key_layer, false);
+		/* Populate Mask Metadata. */
+		nfp_flower_compile_meta_tci((struct nfp_flower_meta_two *)msk,
+					    flow, key_ls->key_layer, true);
+		ext += sizeof(struct nfp_flower_meta_two);
+		msk += sizeof(struct nfp_flower_meta_two);
+
+		/* Populate Exact Port data. */
+		err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
+					      nfp_repr_get_port_id(netdev),
+					      false);
+		if (err)
+			return err;
+
+		/* Populate Mask Port Data. */
+		err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
+					      nfp_repr_get_port_id(netdev),
+					      true);
+		if (err)
+			return err;
+
+		ext += sizeof(struct nfp_flower_in_port);
+		msk += sizeof(struct nfp_flower_in_port);
+	} else {
+		/* Populate Exact Metadata. */
+		nfp_flower_compile_meta((struct nfp_flower_meta_one *)ext,
+					key_ls->key_layer);
+		/* Populate Mask Metadata. */
+		nfp_flower_compile_meta((struct nfp_flower_meta_one *)msk,
+					key_ls->key_layer);
+		ext += sizeof(struct nfp_flower_meta_one);
+		msk += sizeof(struct nfp_flower_meta_one);
+	}
+
+	if (NFP_FLOWER_LAYER_META & key_ls->key_layer) {
+		/* Additional Metadata Fields.
+		 * Currently unsupported.
+		 */
+		return -EOPNOTSUPP;
+	}
+
+	if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
+		/* Populate Exact MAC Data. */
+		nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
+				       flow, false);
+		/* Populate Mask MAC Data. */
+		nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)msk,
+				       flow, true);
+		ext += sizeof(struct nfp_flower_mac_mpls);
+		msk += sizeof(struct nfp_flower_mac_mpls);
+	}
+
+	if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) {
+		/* Populate Exact TP Data. */
+		nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext,
+					 flow, false);
+		/* Populate Mask TP Data. */
+		nfp_flower_compile_tport((struct nfp_flower_tp_ports *)msk,
+					 flow, true);
+		ext += sizeof(struct nfp_flower_tp_ports);
+		msk += sizeof(struct nfp_flower_tp_ports);
+	}
+
+	if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) {
+		/* Populate Exact IPv4 Data. */
+		nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext,
+					flow, false);
+		/* Populate Mask IPv4 Data. */
+		nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)msk,
+					flow, true);
+		ext += sizeof(struct nfp_flower_ipv4);
+		msk += sizeof(struct nfp_flower_ipv4);
+	}
+
+	if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) {
+		/* Populate Exact IPv4 Data. */
+		nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext,
+					flow, false);
+		/* Populate Mask IPv4 Data. */
+		nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)msk,
+					flow, true);
+		ext += sizeof(struct nfp_flower_ipv6);
+		msk += sizeof(struct nfp_flower_ipv6);
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
new file mode 100644
index 000000000000..fec0ff2ca94f
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include <linux/hashtable.h>
+#include <linux/jhash.h>
+#include <linux/vmalloc.h>
+#include <net/pkt_cls.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfp_app.h"
+
+struct nfp_mask_id_table {
+	struct hlist_node link;
+	u32 hash_key;
+	u32 ref_cnt;
+	u8 mask_id;
+};
+
+static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct circ_buf *ring;
+
+	ring = &priv->stats_ids.free_list;
+	/* Check if buffer is full. */
+	if (!CIRC_SPACE(ring->head, ring->tail, NFP_FL_STATS_ENTRY_RS *
+			NFP_FL_STATS_ELEM_RS -
+			NFP_FL_STATS_ELEM_RS + 1))
+		return -ENOBUFS;
+
+	memcpy(&ring->buf[ring->head], &stats_context_id, NFP_FL_STATS_ELEM_RS);
+	ring->head = (ring->head + NFP_FL_STATS_ELEM_RS) %
+		     (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+
+	return 0;
+}
+
+static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	u32 freed_stats_id, temp_stats_id;
+	struct circ_buf *ring;
+
+	ring = &priv->stats_ids.free_list;
+	freed_stats_id = NFP_FL_STATS_ENTRY_RS;
+	/* Check for unallocated entries first. */
+	if (priv->stats_ids.init_unalloc > 0) {
+		*stats_context_id = priv->stats_ids.init_unalloc - 1;
+		priv->stats_ids.init_unalloc--;
+		return 0;
+	}
+
+	/* Check if buffer is empty. */
+	if (ring->head == ring->tail) {
+		*stats_context_id = freed_stats_id;
+		return -ENOENT;
+	}
+
+	memcpy(&temp_stats_id, &ring->buf[ring->tail], NFP_FL_STATS_ELEM_RS);
+	*stats_context_id = temp_stats_id;
+	memcpy(&ring->buf[ring->tail], &freed_stats_id, NFP_FL_STATS_ELEM_RS);
+	ring->tail = (ring->tail + NFP_FL_STATS_ELEM_RS) %
+		     (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+
+	return 0;
+}
+
+/* Must be called with either RTNL or rcu_read_lock */
+struct nfp_fl_payload *
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_fl_payload *flower_entry;
+
+	hash_for_each_possible_rcu(priv->flow_table, flower_entry, link,
+				   tc_flower_cookie)
+		if (flower_entry->tc_flower_cookie == tc_flower_cookie)
+			return flower_entry;
+
+	return NULL;
+}
+
+static void
+nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats)
+{
+	struct nfp_fl_payload *nfp_flow;
+	unsigned long flower_cookie;
+
+	flower_cookie = be64_to_cpu(stats->stats_cookie);
+
+	rcu_read_lock();
+	nfp_flow = nfp_flower_search_fl_table(app, flower_cookie);
+	if (!nfp_flow)
+		goto exit_rcu_unlock;
+
+	if (nfp_flow->meta.host_ctx_id != stats->stats_con_id)
+		goto exit_rcu_unlock;
+
+	spin_lock(&nfp_flow->lock);
+	nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count);
+	nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count);
+	nfp_flow->stats.used = jiffies;
+	spin_unlock(&nfp_flow->lock);
+
+exit_rcu_unlock:
+	rcu_read_unlock();
+}
+
+void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb)
+{
+	unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN;
+	struct nfp_fl_stats_frame *stats_frame;
+	unsigned char *msg;
+	int i;
+
+	msg = nfp_flower_cmsg_get_data(skb);
+
+	stats_frame = (struct nfp_fl_stats_frame *)msg;
+	for (i = 0; i < msg_len / sizeof(*stats_frame); i++)
+		nfp_flower_update_stats(app, stats_frame + i);
+}
+
+static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct circ_buf *ring;
+	struct timespec64 now;
+
+	ring = &priv->mask_ids.mask_id_free_list;
+	/* Checking if buffer is full. */
+	if (CIRC_SPACE(ring->head, ring->tail, NFP_FLOWER_MASK_ENTRY_RS) == 0)
+		return -ENOBUFS;
+
+	memcpy(&ring->buf[ring->head], &mask_id, NFP_FLOWER_MASK_ELEMENT_RS);
+	ring->head = (ring->head + NFP_FLOWER_MASK_ELEMENT_RS) %
+		     (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS);
+
+	getnstimeofday64(&now);
+	priv->mask_ids.last_used[mask_id] = now;
+
+	return 0;
+}
+
+static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct timespec64 delta, now;
+	struct circ_buf *ring;
+	u8 temp_id, freed_id;
+
+	ring = &priv->mask_ids.mask_id_free_list;
+	freed_id = NFP_FLOWER_MASK_ENTRY_RS - 1;
+	/* Checking for unallocated entries first. */
+	if (priv->mask_ids.init_unallocated > 0) {
+		*mask_id = priv->mask_ids.init_unallocated;
+		priv->mask_ids.init_unallocated--;
+		return 0;
+	}
+
+	/* Checking if buffer is empty. */
+	if (ring->head == ring->tail)
+		goto err_not_found;
+
+	memcpy(&temp_id, &ring->buf[ring->tail], NFP_FLOWER_MASK_ELEMENT_RS);
+	*mask_id = temp_id;
+
+	getnstimeofday64(&now);
+	delta = timespec64_sub(now, priv->mask_ids.last_used[*mask_id]);
+
+	if (timespec64_to_ns(&delta) < NFP_FL_MASK_REUSE_TIME_NS)
+		goto err_not_found;
+
+	memcpy(&ring->buf[ring->tail], &freed_id, NFP_FLOWER_MASK_ELEMENT_RS);
+	ring->tail = (ring->tail + NFP_FLOWER_MASK_ELEMENT_RS) %
+		     (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS);
+
+	return 0;
+
+err_not_found:
+	*mask_id = freed_id;
+	return -ENOENT;
+}
+
+static int
+nfp_add_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_mask_id_table *mask_entry;
+	unsigned long hash_key;
+	u8 mask_id;
+
+	if (nfp_mask_alloc(app, &mask_id))
+		return -ENOENT;
+
+	mask_entry = kmalloc(sizeof(*mask_entry), GFP_KERNEL);
+	if (!mask_entry) {
+		nfp_release_mask_id(app, mask_id);
+		return -ENOMEM;
+	}
+
+	INIT_HLIST_NODE(&mask_entry->link);
+	mask_entry->mask_id = mask_id;
+	hash_key = jhash(mask_data, mask_len, priv->mask_id_seed);
+	mask_entry->hash_key = hash_key;
+	mask_entry->ref_cnt = 1;
+	hash_add(priv->mask_table, &mask_entry->link, hash_key);
+
+	return mask_id;
+}
+
+static struct nfp_mask_id_table *
+nfp_search_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_mask_id_table *mask_entry;
+	unsigned long hash_key;
+
+	hash_key = jhash(mask_data, mask_len, priv->mask_id_seed);
+
+	hash_for_each_possible(priv->mask_table, mask_entry, link, hash_key)
+		if (mask_entry->hash_key == hash_key)
+			return mask_entry;
+
+	return NULL;
+}
+
+static int
+nfp_find_in_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len)
+{
+	struct nfp_mask_id_table *mask_entry;
+
+	mask_entry = nfp_search_mask_table(app, mask_data, mask_len);
+	if (!mask_entry)
+		return -ENOENT;
+
+	mask_entry->ref_cnt++;
+
+	/* Casting u8 to int for later use. */
+	return mask_entry->mask_id;
+}
+
+static bool
+nfp_check_mask_add(struct nfp_app *app, char *mask_data, u32 mask_len,
+		   u8 *meta_flags, u8 *mask_id)
+{
+	int id;
+
+	id = nfp_find_in_mask_table(app, mask_data, mask_len);
+	if (id < 0) {
+		id = nfp_add_mask_table(app, mask_data, mask_len);
+		if (id < 0)
+			return false;
+		*meta_flags |= NFP_FL_META_FLAG_NEW_MASK;
+	}
+	*mask_id = id;
+
+	return true;
+}
+
+static bool
+nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len,
+		      u8 *meta_flags, u8 *mask_id)
+{
+	struct nfp_mask_id_table *mask_entry;
+
+	mask_entry = nfp_search_mask_table(app, mask_data, mask_len);
+	if (!mask_entry)
+		return false;
+
+	*mask_id = mask_entry->mask_id;
+	mask_entry->ref_cnt--;
+	if (!mask_entry->ref_cnt) {
+		hash_del(&mask_entry->link);
+		nfp_release_mask_id(app, *mask_id);
+		kfree(mask_entry);
+		if (meta_flags)
+			*meta_flags |= NFP_FL_META_FLAG_LAST_MASK;
+	}
+
+	return true;
+}
+
+int nfp_compile_flow_metadata(struct nfp_app *app,
+			      struct tc_cls_flower_offload *flow,
+			      struct nfp_fl_payload *nfp_flow)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_fl_payload *check_entry;
+	u8 new_mask_id;
+	u32 stats_cxt;
+
+	if (nfp_get_stats_entry(app, &stats_cxt))
+		return -ENOENT;
+
+	nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt);
+	nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie);
+
+	new_mask_id = 0;
+	if (!nfp_check_mask_add(app, nfp_flow->mask_data,
+				nfp_flow->meta.mask_len,
+				&nfp_flow->meta.flags, &new_mask_id)) {
+		if (nfp_release_stats_entry(app, stats_cxt))
+			return -EINVAL;
+		return -ENOENT;
+	}
+
+	nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
+	priv->flower_version++;
+
+	/* Update flow payload with mask ids. */
+	nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
+	nfp_flow->stats.pkts = 0;
+	nfp_flow->stats.bytes = 0;
+	nfp_flow->stats.used = jiffies;
+
+	check_entry = nfp_flower_search_fl_table(app, flow->cookie);
+	if (check_entry) {
+		if (nfp_release_stats_entry(app, stats_cxt))
+			return -EINVAL;
+
+		if (!nfp_check_mask_remove(app, nfp_flow->mask_data,
+					   nfp_flow->meta.mask_len,
+					   NULL, &new_mask_id))
+			return -EINVAL;
+
+		return -EEXIST;
+	}
+
+	return 0;
+}
+
+int nfp_modify_flow_metadata(struct nfp_app *app,
+			     struct nfp_fl_payload *nfp_flow)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	u8 new_mask_id = 0;
+	u32 temp_ctx_id;
+
+	nfp_check_mask_remove(app, nfp_flow->mask_data,
+			      nfp_flow->meta.mask_len, &nfp_flow->meta.flags,
+			      &new_mask_id);
+
+	nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
+	priv->flower_version++;
+
+	/* Update flow payload with mask ids. */
+	nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
+
+	/* Release the stats ctx id. */
+	temp_ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
+
+	return nfp_release_stats_entry(app, temp_ctx_id);
+}
+
+int nfp_flower_metadata_init(struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	hash_init(priv->mask_table);
+	hash_init(priv->flow_table);
+	get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed));
+
+	/* Init ring buffer and unallocated mask_ids. */
+	priv->mask_ids.mask_id_free_list.buf =
+		kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
+			      NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL);
+	if (!priv->mask_ids.mask_id_free_list.buf)
+		return -ENOMEM;
+
+	priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1;
+
+	/* Init timestamps for mask id*/
+	priv->mask_ids.last_used =
+		kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
+			      sizeof(*priv->mask_ids.last_used), GFP_KERNEL);
+	if (!priv->mask_ids.last_used)
+		goto err_free_mask_id;
+
+	/* Init ring buffer and unallocated stats_ids. */
+	priv->stats_ids.free_list.buf =
+		vmalloc(NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+	if (!priv->stats_ids.free_list.buf)
+		goto err_free_last_used;
+
+	priv->stats_ids.init_unalloc = NFP_FL_REPEATED_HASH_MAX;
+
+	return 0;
+
+err_free_last_used:
+	kfree(priv->stats_ids.free_list.buf);
+err_free_mask_id:
+	kfree(priv->mask_ids.mask_id_free_list.buf);
+	return -ENOMEM;
+}
+
+void nfp_flower_metadata_cleanup(struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	if (!priv)
+		return;
+
+	kfree(priv->mask_ids.mask_id_free_list.buf);
+	kfree(priv->mask_ids.last_used);
+	vfree(priv->stats_ids.free_list.buf);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
new file mode 100644
index 000000000000..4ad10bd5e139
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+#include <net/devlink.h>
+#include <net/pkt_cls.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfpcore/nfp_cpp.h"
+#include "../nfpcore/nfp_nsp.h"
+#include "../nfp_app.h"
+#include "../nfp_main.h"
+#include "../nfp_net.h"
+#include "../nfp_port.h"
+
+static int
+nfp_flower_xmit_flow(struct net_device *netdev,
+		     struct nfp_fl_payload *nfp_flow, u8 mtype)
+{
+	u32 meta_len, key_len, mask_len, act_len, tot_len;
+	struct nfp_repr *priv = netdev_priv(netdev);
+	struct sk_buff *skb;
+	unsigned char *msg;
+
+	meta_len =  sizeof(struct nfp_fl_rule_metadata);
+	key_len = nfp_flow->meta.key_len;
+	mask_len = nfp_flow->meta.mask_len;
+	act_len = nfp_flow->meta.act_len;
+
+	tot_len = meta_len + key_len + mask_len + act_len;
+
+	/* Convert to long words as firmware expects
+	 * lengths in units of NFP_FL_LW_SIZ.
+	 */
+	nfp_flow->meta.key_len >>= NFP_FL_LW_SIZ;
+	nfp_flow->meta.mask_len >>= NFP_FL_LW_SIZ;
+	nfp_flow->meta.act_len >>= NFP_FL_LW_SIZ;
+
+	skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype);
+	if (!skb)
+		return -ENOMEM;
+
+	msg = nfp_flower_cmsg_get_data(skb);
+	memcpy(msg, &nfp_flow->meta, meta_len);
+	memcpy(&msg[meta_len], nfp_flow->unmasked_data, key_len);
+	memcpy(&msg[meta_len + key_len], nfp_flow->mask_data, mask_len);
+	memcpy(&msg[meta_len + key_len + mask_len],
+	       nfp_flow->action_data, act_len);
+
+	/* Convert back to bytes as software expects
+	 * lengths in units of bytes.
+	 */
+	nfp_flow->meta.key_len <<= NFP_FL_LW_SIZ;
+	nfp_flow->meta.mask_len <<= NFP_FL_LW_SIZ;
+	nfp_flow->meta.act_len <<= NFP_FL_LW_SIZ;
+
+	nfp_ctrl_tx(priv->app->ctrl, skb);
+
+	return 0;
+}
+
+static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)
+{
+	return dissector_uses_key(f->dissector,
+				  FLOW_DISSECTOR_KEY_IPV4_ADDRS) ||
+		dissector_uses_key(f->dissector,
+				   FLOW_DISSECTOR_KEY_IPV6_ADDRS) ||
+		dissector_uses_key(f->dissector,
+				   FLOW_DISSECTOR_KEY_PORTS) ||
+		dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ICMP);
+}
+
+static int
+nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
+				struct tc_cls_flower_offload *flow)
+{
+	struct flow_dissector_key_control *mask_enc_ctl;
+	struct flow_dissector_key_basic *mask_basic;
+	struct flow_dissector_key_basic *key_basic;
+	u32 key_layer_two;
+	u8 key_layer;
+	int key_size;
+
+	mask_enc_ctl = skb_flow_dissector_target(flow->dissector,
+						 FLOW_DISSECTOR_KEY_ENC_CONTROL,
+						 flow->mask);
+
+	mask_basic = skb_flow_dissector_target(flow->dissector,
+					       FLOW_DISSECTOR_KEY_BASIC,
+					       flow->mask);
+
+	key_basic = skb_flow_dissector_target(flow->dissector,
+					      FLOW_DISSECTOR_KEY_BASIC,
+					      flow->key);
+	key_layer_two = 0;
+	key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
+	key_size = sizeof(struct nfp_flower_meta_one) +
+		   sizeof(struct nfp_flower_in_port) +
+		   sizeof(struct nfp_flower_mac_mpls);
+
+	/* We are expecting a tunnel. For now we ignore offloading. */
+	if (mask_enc_ctl->addr_type)
+		return -EOPNOTSUPP;
+
+	if (mask_basic->n_proto) {
+		/* Ethernet type is present in the key. */
+		switch (key_basic->n_proto) {
+		case cpu_to_be16(ETH_P_IP):
+			key_layer |= NFP_FLOWER_LAYER_IPV4;
+			key_size += sizeof(struct nfp_flower_ipv4);
+			break;
+
+		case cpu_to_be16(ETH_P_IPV6):
+			key_layer |= NFP_FLOWER_LAYER_IPV6;
+			key_size += sizeof(struct nfp_flower_ipv6);
+			break;
+
+		/* Currently we do not offload ARP
+		 * because we rely on it to get to the host.
+		 */
+		case cpu_to_be16(ETH_P_ARP):
+			return -EOPNOTSUPP;
+
+		/* Will be included in layer 2. */
+		case cpu_to_be16(ETH_P_8021Q):
+			break;
+
+		default:
+			/* Other ethtype - we need check the masks for the
+			 * remainder of the key to ensure we can offload.
+			 */
+			if (nfp_flower_check_higher_than_mac(flow))
+				return -EOPNOTSUPP;
+			break;
+		}
+	}
+
+	if (mask_basic->ip_proto) {
+		/* Ethernet type is present in the key. */
+		switch (key_basic->ip_proto) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+		case IPPROTO_SCTP:
+		case IPPROTO_ICMP:
+		case IPPROTO_ICMPV6:
+			key_layer |= NFP_FLOWER_LAYER_TP;
+			key_size += sizeof(struct nfp_flower_tp_ports);
+			break;
+		default:
+			/* Other ip proto - we need check the masks for the
+			 * remainder of the key to ensure we can offload.
+			 */
+			return -EOPNOTSUPP;
+		}
+	}
+
+	ret_key_ls->key_layer = key_layer;
+	ret_key_ls->key_layer_two = key_layer_two;
+	ret_key_ls->key_size = key_size;
+
+	return 0;
+}
+
+static struct nfp_fl_payload *
+nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
+{
+	struct nfp_fl_payload *flow_pay;
+
+	flow_pay = kmalloc(sizeof(*flow_pay), GFP_KERNEL);
+	if (!flow_pay)
+		return NULL;
+
+	flow_pay->meta.key_len = key_layer->key_size;
+	flow_pay->unmasked_data = kmalloc(key_layer->key_size, GFP_KERNEL);
+	if (!flow_pay->unmasked_data)
+		goto err_free_flow;
+
+	flow_pay->meta.mask_len = key_layer->key_size;
+	flow_pay->mask_data = kmalloc(key_layer->key_size, GFP_KERNEL);
+	if (!flow_pay->mask_data)
+		goto err_free_unmasked;
+
+	flow_pay->action_data = kmalloc(NFP_FL_MAX_A_SIZ, GFP_KERNEL);
+	if (!flow_pay->action_data)
+		goto err_free_mask;
+
+	flow_pay->meta.flags = 0;
+	spin_lock_init(&flow_pay->lock);
+
+	return flow_pay;
+
+err_free_mask:
+	kfree(flow_pay->mask_data);
+err_free_unmasked:
+	kfree(flow_pay->unmasked_data);
+err_free_flow:
+	kfree(flow_pay);
+	return NULL;
+}
+
+/**
+ * nfp_flower_add_offload() - Adds a new flow to hardware.
+ * @app:	Pointer to the APP handle
+ * @netdev:	netdev structure.
+ * @flow:	TC flower classifier offload structure.
+ *
+ * Adds a new flow to the repeated hash structure and action payload.
+ *
+ * Return: negative value on error, 0 if configured successfully.
+ */
+static int
+nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
+		       struct tc_cls_flower_offload *flow)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_fl_payload *flow_pay;
+	struct nfp_fl_key_ls *key_layer;
+	int err;
+
+	key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL);
+	if (!key_layer)
+		return -ENOMEM;
+
+	err = nfp_flower_calculate_key_layers(key_layer, flow);
+	if (err)
+		goto err_free_key_ls;
+
+	flow_pay = nfp_flower_allocate_new(key_layer);
+	if (!flow_pay) {
+		err = -ENOMEM;
+		goto err_free_key_ls;
+	}
+
+	err = nfp_flower_compile_flow_match(flow, key_layer, netdev, flow_pay);
+	if (err)
+		goto err_destroy_flow;
+
+	err = nfp_flower_compile_action(flow, netdev, flow_pay);
+	if (err)
+		goto err_destroy_flow;
+
+	err = nfp_compile_flow_metadata(app, flow, flow_pay);
+	if (err)
+		goto err_destroy_flow;
+
+	err = nfp_flower_xmit_flow(netdev, flow_pay,
+				   NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
+	if (err)
+		goto err_destroy_flow;
+
+	INIT_HLIST_NODE(&flow_pay->link);
+	flow_pay->tc_flower_cookie = flow->cookie;
+	hash_add_rcu(priv->flow_table, &flow_pay->link, flow->cookie);
+
+	/* Deallocate flow payload when flower rule has been destroyed. */
+	kfree(key_layer);
+
+	return 0;
+
+err_destroy_flow:
+	kfree(flow_pay->action_data);
+	kfree(flow_pay->mask_data);
+	kfree(flow_pay->unmasked_data);
+	kfree(flow_pay);
+err_free_key_ls:
+	kfree(key_layer);
+	return err;
+}
+
+/**
+ * nfp_flower_del_offload() - Removes a flow from hardware.
+ * @app:	Pointer to the APP handle
+ * @netdev:	netdev structure.
+ * @flow:	TC flower classifier offload structure
+ *
+ * Removes a flow from the repeated hash structure and clears the
+ * action payload.
+ *
+ * Return: negative value on error, 0 if removed successfully.
+ */
+static int
+nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
+		       struct tc_cls_flower_offload *flow)
+{
+	struct nfp_fl_payload *nfp_flow;
+	int err;
+
+	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+	if (!nfp_flow)
+		return -ENOENT;
+
+	err = nfp_modify_flow_metadata(app, nfp_flow);
+	if (err)
+		goto err_free_flow;
+
+	err = nfp_flower_xmit_flow(netdev, nfp_flow,
+				   NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
+	if (err)
+		goto err_free_flow;
+
+err_free_flow:
+	hash_del_rcu(&nfp_flow->link);
+	kfree(nfp_flow->action_data);
+	kfree(nfp_flow->mask_data);
+	kfree(nfp_flow->unmasked_data);
+	kfree_rcu(nfp_flow, rcu);
+	return err;
+}
+
+/**
+ * nfp_flower_get_stats() - Populates flow stats obtained from hardware.
+ * @app:	Pointer to the APP handle
+ * @flow:	TC flower classifier offload structure
+ *
+ * Populates a flow statistics structure which which corresponds to a
+ * specific flow.
+ *
+ * Return: negative value on error, 0 if stats populated successfully.
+ */
+static int
+nfp_flower_get_stats(struct nfp_app *app, struct tc_cls_flower_offload *flow)
+{
+	struct nfp_fl_payload *nfp_flow;
+
+	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+	if (!nfp_flow)
+		return -EINVAL;
+
+	spin_lock_bh(&nfp_flow->lock);
+	tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes,
+			      nfp_flow->stats.pkts, nfp_flow->stats.used);
+
+	nfp_flow->stats.pkts = 0;
+	nfp_flow->stats.bytes = 0;
+	spin_unlock_bh(&nfp_flow->lock);
+
+	return 0;
+}
+
+static int
+nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
+			struct tc_cls_flower_offload *flower)
+{
+	switch (flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return nfp_flower_add_offload(app, netdev, flower);
+	case TC_CLSFLOWER_DESTROY:
+		return nfp_flower_del_offload(app, netdev, flower);
+	case TC_CLSFLOWER_STATS:
+		return nfp_flower_get_stats(app, flower);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
+			u32 handle, __be16 proto, struct tc_to_netdev *tc)
+{
+	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+		return -EOPNOTSUPP;
+
+	if (!eth_proto_is_802_3(proto))
+		return -EOPNOTSUPP;
+
+	if (tc->type != TC_SETUP_CLSFLOWER)
+		return -EINVAL;
+
+	return nfp_flower_repr_offload(app, netdev, tc->cls_flower);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index 5620de05c996..c704c022574f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -43,7 +43,9 @@
 static const struct nfp_app_type *apps[] = {
 	&app_nic,
 	&app_bpf,
+#ifdef CONFIG_NFP_APP_FLOWER
 	&app_flower,
+#endif
 };
 
 const char *nfp_app_mip_name(struct nfp_app *app)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index ae2d02753d1a..5d714e10d9a9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -47,6 +47,7 @@ struct sk_buff;
 struct nfp_app;
 struct nfp_cpp;
 struct nfp_pf;
+struct nfp_repr;
 struct nfp_net;
 
 enum nfp_app_id {
@@ -66,10 +67,13 @@ extern const struct nfp_app_type app_flower;
  * @ctrl_has_meta:  control messages have prepend of type:5/port:CTRL
  *
  * Callbacks
- * @init:	perform basic app checks
+ * @init:	perform basic app checks and init
+ * @clean:	clean app state
  * @extra_cap:	extra capabilities string
  * @vnic_init:	init vNICs (assign port types, etc.)
  * @vnic_clean:	clean up app's vNIC state
+ * @repr_open:	representor netdev open callback
+ * @repr_stop:	representor netdev stop callback
  * @start:	start application logic
  * @stop:	stop application logic
  * @ctrl_msg_rx:    control message handler
@@ -88,6 +92,7 @@ struct nfp_app_type {
 	bool ctrl_has_meta;
 
 	int (*init)(struct nfp_app *app);
+	void (*clean)(struct nfp_app *app);
 
 	const char *(*extra_cap)(struct nfp_app *app, struct nfp_net *nn);
 
@@ -95,6 +100,9 @@ struct nfp_app_type {
 			 unsigned int id);
 	void (*vnic_clean)(struct nfp_app *app, struct nfp_net *nn);
 
+	int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr);
+	int (*repr_stop)(struct nfp_app *app, struct nfp_repr *repr);
+
 	int (*start)(struct nfp_app *app);
 	void (*stop)(struct nfp_app *app);
 
@@ -144,6 +152,12 @@ static inline int nfp_app_init(struct nfp_app *app)
 	return app->type->init(app);
 }
 
+static inline void nfp_app_clean(struct nfp_app *app)
+{
+	if (app->type->clean)
+		app->type->clean(app);
+}
+
 static inline int nfp_app_vnic_init(struct nfp_app *app, struct nfp_net *nn,
 				    unsigned int id)
 {
@@ -156,6 +170,20 @@ static inline void nfp_app_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
 		app->type->vnic_clean(app, nn);
 }
 
+static inline int nfp_app_repr_open(struct nfp_app *app, struct nfp_repr *repr)
+{
+	if (!app->type->repr_open)
+		return -EINVAL;
+	return app->type->repr_open(app, repr);
+}
+
+static inline int nfp_app_repr_stop(struct nfp_app *app, struct nfp_repr *repr)
+{
+	if (!app->type->repr_stop)
+		return -EINVAL;
+	return app->type->repr_stop(app, repr);
+}
+
 static inline int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl)
 {
 	app->ctrl = ctrl;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 748e54cc885e..d67969d3e484 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -107,17 +107,18 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs)
 		goto err_unlock;
 	}
 
-	err = nfp_app_sriov_enable(pf->app, num_vfs);
+	err = pci_enable_sriov(pdev, num_vfs);
 	if (err) {
-		dev_warn(&pdev->dev, "App specific PCI sriov configuration failed: %d\n",
-			 err);
+		dev_warn(&pdev->dev, "Failed to enable PCI SR-IOV: %d\n", err);
 		goto err_unlock;
 	}
 
-	err = pci_enable_sriov(pdev, num_vfs);
+	err = nfp_app_sriov_enable(pf->app, num_vfs);
 	if (err) {
-		dev_warn(&pdev->dev, "Failed to enable PCI sriov: %d\n", err);
-		goto err_app_sriov_disable;
+		dev_warn(&pdev->dev,
+			 "App specific PCI SR-IOV configuration failed: %d\n",
+			 err);
+		goto err_sriov_disable;
 	}
 
 	pf->num_vfs = num_vfs;
@@ -127,8 +128,8 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs)
 	mutex_unlock(&pf->lock);
 	return num_vfs;
 
-err_app_sriov_disable:
-	nfp_app_sriov_disable(pf->app);
+err_sriov_disable:
+	pci_disable_sriov(pdev);
 err_unlock:
 	mutex_unlock(&pf->lock);
 	return err;
@@ -136,17 +137,20 @@ err_unlock:
 	return 0;
 }
 
-static int __nfp_pcie_sriov_disable(struct pci_dev *pdev)
+static int nfp_pcie_sriov_disable(struct pci_dev *pdev)
 {
 #ifdef CONFIG_PCI_IOV
 	struct nfp_pf *pf = pci_get_drvdata(pdev);
 
+	mutex_lock(&pf->lock);
+
 	/* If the VFs are assigned we cannot shut down SR-IOV without
 	 * causing issues, so just leave the hardware available but
 	 * disabled
 	 */
 	if (pci_vfs_assigned(pdev)) {
 		dev_warn(&pdev->dev, "Disabling while VFs assigned - VFs will not be deallocated\n");
+		mutex_unlock(&pf->lock);
 		return -EPERM;
 	}
 
@@ -156,20 +160,10 @@ static int __nfp_pcie_sriov_disable(struct pci_dev *pdev)
 
 	pci_disable_sriov(pdev);
 	dev_dbg(&pdev->dev, "Removed VFs.\n");
-#endif
-	return 0;
-}
-
-static int nfp_pcie_sriov_disable(struct pci_dev *pdev)
-{
-	struct nfp_pf *pf = pci_get_drvdata(pdev);
-	int err;
 
-	mutex_lock(&pf->lock);
-	err = __nfp_pcie_sriov_disable(pdev);
 	mutex_unlock(&pf->lock);
-
-	return err;
+#endif
+	return 0;
 }
 
 static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs)
@@ -382,6 +376,12 @@ static int nfp_pci_probe(struct pci_dev *pdev,
 	pci_set_drvdata(pdev, pf);
 	pf->pdev = pdev;
 
+	pf->wq = alloc_workqueue("nfp-%s", 0, 2, pci_name(pdev));
+	if (!pf->wq) {
+		err = -ENOMEM;
+		goto err_pci_priv_unset;
+	}
+
 	pf->cpp = nfp_cpp_from_nfp6000_pcie(pdev);
 	if (IS_ERR_OR_NULL(pf->cpp)) {
 		err = PTR_ERR(pf->cpp);
@@ -414,6 +414,14 @@ static int nfp_pci_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_fw_unload;
 
+	pf->num_vfs = pci_num_vf(pdev);
+	if (pf->num_vfs > pf->limit_vfs) {
+		dev_err(&pdev->dev,
+			"Error: %d VFs already enabled, but loaded FW can only support %d\n",
+			pf->num_vfs, pf->limit_vfs);
+		goto err_fw_unload;
+	}
+
 	err = nfp_net_pci_probe(pf);
 	if (err)
 		goto err_sriov_unlimit;
@@ -443,6 +451,8 @@ err_hwinfo_free:
 	kfree(pf->hwinfo);
 	nfp_cpp_free(pf->cpp);
 err_disable_msix:
+	destroy_workqueue(pf->wq);
+err_pci_priv_unset:
 	pci_set_drvdata(pdev, NULL);
 	mutex_destroy(&pf->lock);
 	devlink_free(devlink);
@@ -463,11 +473,11 @@ static void nfp_pci_remove(struct pci_dev *pdev)
 
 	devlink = priv_to_devlink(pf);
 
+	nfp_net_pci_remove(pf);
+
 	nfp_pcie_sriov_disable(pdev);
 	pci_sriov_set_totalvfs(pf->pdev, 0);
 
-	nfp_net_pci_remove(pf);
-
 	devlink_unregister(devlink);
 
 	kfree(pf->rtbl);
@@ -475,6 +485,7 @@ static void nfp_pci_remove(struct pci_dev *pdev)
 	if (pf->fw_loaded)
 		nfp_fw_unload(pf);
 
+	destroy_workqueue(pf->wq);
 	pci_set_drvdata(pdev, NULL);
 	kfree(pf->hwinfo);
 	nfp_cpp_free(pf->cpp);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h
index edc14dc78674..a08cfba7e68e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h
@@ -89,6 +89,7 @@ struct nfp_rtsym_table;
  * @num_vnics:		Number of vNICs spawned
  * @vnics:		Linked list of vNIC structures (struct nfp_net)
  * @ports:		Linked list of port structures (struct nfp_port)
+ * @wq:			Workqueue for running works which need to grab @lock
  * @port_refresh_work:	Work entry for taking netdevs out
  * @lock:		Protects all fields which may change after probe
  */
@@ -131,7 +132,10 @@ struct nfp_pf {
 
 	struct list_head vnics;
 	struct list_head ports;
+
+	struct workqueue_struct *wq;
 	struct work_struct port_refresh_work;
+
 	struct mutex lock;
 };
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 2e728543e840..30f82b41d400 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -64,6 +64,7 @@
 #include <linux/vmalloc.h>
 #include <linux/ktime.h>
 
+#include <net/switchdev.h>
 #include <net/vxlan.h>
 
 #include "nfpcore/nfp_nsp.h"
@@ -3096,18 +3097,6 @@ static void nfp_net_stat64(struct net_device *netdev,
 	}
 }
 
-static int
-nfp_net_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
-		 __be16 proto, struct tc_to_netdev *tc)
-{
-	struct nfp_net *nn = netdev_priv(netdev);
-
-	if (chain_index)
-		return -EOPNOTSUPP;
-
-	return nfp_app_setup_tc(nn->app, netdev, handle, proto, tc);
-}
-
 static int nfp_net_set_features(struct net_device *netdev,
 				netdev_features_t features)
 {
@@ -3423,7 +3412,7 @@ const struct net_device_ops nfp_net_netdev_ops = {
 	.ndo_get_stats64	= nfp_net_stat64,
 	.ndo_vlan_rx_add_vid	= nfp_net_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= nfp_net_vlan_rx_kill_vid,
-	.ndo_setup_tc		= nfp_net_setup_tc,
+	.ndo_setup_tc		= nfp_port_setup_tc,
 	.ndo_tx_timeout		= nfp_net_tx_timeout,
 	.ndo_set_rx_mode	= nfp_net_set_rx_mode,
 	.ndo_change_mtu		= nfp_net_change_mtu,
@@ -3703,6 +3692,8 @@ static void nfp_net_netdev_init(struct nfp_net *nn)
 	netdev->netdev_ops = &nfp_net_netdev_ops;
 	netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
 
+	SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
+
 	/* MTU range: 68 - hw-specific max */
 	netdev->min_mtu = ETH_MIN_MTU;
 	netdev->max_mtu = nn->max_mtu;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index cfcbc3b9a9aa..c85a2f18c4df 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -81,58 +81,6 @@ static int nfp_is_ready(struct nfp_pf *pf)
 }
 
 /**
- * nfp_net_map_area() - Help function to map an area
- * @cpp:    NFP CPP handler
- * @name:   Name for the area
- * @target: CPP target
- * @addr:   CPP address
- * @size:   Size of the area
- * @area:   Area handle (returned).
- *
- * This function is primarily to simplify the code in the main probe
- * function. To undo the effect of this functions call
- * @nfp_cpp_area_release_free(*area);
- *
- * Return: Pointer to memory mapped area or ERR_PTR
- */
-static u8 __iomem *nfp_net_map_area(struct nfp_cpp *cpp,
-				    const char *name, int isl, int target,
-				    unsigned long long addr, unsigned long size,
-				    struct nfp_cpp_area **area)
-{
-	u8 __iomem *res;
-	u32 dest;
-	int err;
-
-	dest = NFP_CPP_ISLAND_ID(target, NFP_CPP_ACTION_RW, 0, isl);
-
-	*area = nfp_cpp_area_alloc_with_name(cpp, dest, name, addr, size);
-	if (!*area) {
-		err = -EIO;
-		goto err_area;
-	}
-
-	err = nfp_cpp_area_acquire(*area);
-	if (err < 0)
-		goto err_acquire;
-
-	res = nfp_cpp_area_iomem(*area);
-	if (!res) {
-		err = -EIO;
-		goto err_map;
-	}
-
-	return res;
-
-err_map:
-	nfp_cpp_area_release(*area);
-err_acquire:
-	nfp_cpp_area_free(*area);
-err_area:
-	return (u8 __iomem *)ERR_PTR(err);
-}
-
-/**
  * nfp_net_get_mac_addr() - Get the MAC address.
  * @pf:       NFP PF handle
  * @port:     NFP port structure
@@ -226,31 +174,12 @@ static u8 __iomem *
 nfp_net_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt,
 		     unsigned int min_size, struct nfp_cpp_area **area)
 {
-	const struct nfp_rtsym *sym;
 	char pf_symbol[256];
-	u8 __iomem *mem;
 
 	snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt,
 		 nfp_cppcore_pcie_unit(pf->cpp));
 
-	sym = nfp_rtsym_lookup(pf->rtbl, pf_symbol);
-	if (!sym)
-		return (u8 __iomem *)ERR_PTR(-ENOENT);
-
-	if (sym->size < min_size) {
-		nfp_err(pf->cpp, "PF symbol %s too small\n", pf_symbol);
-		return (u8 __iomem *)ERR_PTR(-EINVAL);
-	}
-
-	mem = nfp_net_map_area(pf->cpp, name, sym->domain, sym->target,
-			       sym->addr, sym->size, area);
-	if (IS_ERR(mem)) {
-		nfp_err(pf->cpp, "Failed to map PF symbol %s: %ld\n",
-			pf_symbol, PTR_ERR(mem));
-		return mem;
-	}
-
-	return mem;
+	return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area);
 }
 
 static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn)
@@ -485,7 +414,7 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride)
 	if (IS_ERR(ctrl_bar)) {
 		nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
 		err = PTR_ERR(ctrl_bar);
-		goto err_free;
+		goto err_app_clean;
 	}
 
 	pf->ctrl_vnic =	nfp_net_pf_alloc_vnic(pf, false, ctrl_bar, qc_bar,
@@ -499,8 +428,11 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride)
 
 err_unmap:
 	nfp_cpp_area_release_free(pf->ctrl_vnic_bar);
+err_app_clean:
+	nfp_app_clean(pf->app);
 err_free:
 	nfp_app_free(pf->app);
+	pf->app = NULL;
 	return err;
 }
 
@@ -510,6 +442,7 @@ static void nfp_net_pf_app_clean(struct nfp_pf *pf)
 		nfp_net_pf_free_vnic(pf, pf->ctrl_vnic);
 		nfp_cpp_area_release_free(pf->ctrl_vnic_bar);
 	}
+	nfp_app_clean(pf->app);
 	nfp_app_free(pf->app);
 	pf->app = NULL;
 }
@@ -555,8 +488,16 @@ static int nfp_net_pf_app_start(struct nfp_pf *pf)
 	if (err)
 		goto err_ctrl_stop;
 
+	if (pf->num_vfs) {
+		err = nfp_app_sriov_enable(pf->app, pf->num_vfs);
+		if (err)
+			goto err_app_stop;
+	}
+
 	return 0;
 
+err_app_stop:
+	nfp_app_stop(pf->app);
 err_ctrl_stop:
 	nfp_net_pf_app_stop_ctrl(pf);
 	return err;
@@ -564,6 +505,8 @@ err_ctrl_stop:
 
 static void nfp_net_pf_app_stop(struct nfp_pf *pf)
 {
+	if (pf->num_vfs)
+		nfp_app_sriov_disable(pf->app);
 	nfp_app_stop(pf->app);
 	nfp_net_pf_app_stop_ctrl(pf);
 }
@@ -580,26 +523,22 @@ static void nfp_net_pci_unmap_mem(struct nfp_pf *pf)
 
 static int nfp_net_pci_map_mem(struct nfp_pf *pf)
 {
-	u32 ctrl_bar_sz;
 	u8 __iomem *mem;
+	u32 min_size;
 	int err;
 
-	ctrl_bar_sz = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
+	min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
 	mem = nfp_net_pf_map_rtsym(pf, "net.ctrl", "_pf%d_net_bar0",
-				   ctrl_bar_sz, &pf->data_vnic_bar);
+				   min_size, &pf->data_vnic_bar);
 	if (IS_ERR(mem)) {
 		nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
-		err = PTR_ERR(mem);
-		if (!pf->fw_loaded && err == -ENOENT)
-			err = -EPROBE_DEFER;
-		return err;
+		return PTR_ERR(mem);
 	}
 
-	pf->mac_stats_mem = nfp_net_pf_map_rtsym(pf, "net.macstats",
-						 "_mac_stats",
-						 NFP_MAC_STATS_SIZE *
-						 (pf->eth_tbl->max_index + 1),
-						 &pf->mac_stats_bar);
+	min_size =  NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1);
+	pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats",
+					  "net.macstats", min_size,
+					  &pf->mac_stats_bar);
 	if (IS_ERR(pf->mac_stats_mem)) {
 		if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) {
 			err = PTR_ERR(pf->mac_stats_mem);
@@ -620,7 +559,7 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
 		pf->vf_cfg_mem = NULL;
 	}
 
-	mem = nfp_net_map_area(pf->cpp, "net.qc", 0, 0,
+	mem = nfp_cpp_map_area(pf->cpp, "net.qc", 0, 0,
 			       NFP_PCIE_QUEUE(0), NFP_QCP_QUEUE_AREA_SZ,
 			       &pf->qc_area);
 	if (IS_ERR(mem)) {
@@ -743,7 +682,7 @@ void nfp_net_refresh_port_table(struct nfp_port *port)
 
 	set_bit(NFP_PORT_CHANGED, &port->flags);
 
-	schedule_work(&pf->port_refresh_work);
+	queue_work(pf->wq, &pf->port_refresh_work);
 }
 
 int nfp_net_refresh_eth_port(struct nfp_port *port)
@@ -786,6 +725,12 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
 		return -EINVAL;
 	}
 
+	if (!pf->rtbl) {
+		nfp_err(pf->cpp, "No %s, giving up.\n",
+			pf->fw_loaded ? "symbol table" : "firmware found");
+		return -EPROBE_DEFER;
+	}
+
 	mutex_lock(&pf->lock);
 	pf->max_data_vnics = nfp_net_pf_get_num_ports(pf);
 	if ((int)pf->max_data_vnics < 0) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 44adcc5df11e..8ec5474f4b18 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -35,8 +35,10 @@
 #include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/lockdep.h>
 #include <net/dst_metadata.h>
+#include <net/switchdev.h>
 
 #include "nfpcore/nfp_cpp.h"
+#include "nfpcore/nfp_nsp.h"
 #include "nfp_app.h"
 #include "nfp_main.h"
 #include "nfp_net_ctrl.h"
@@ -135,25 +137,34 @@ nfp_repr_pf_get_stats64(const struct nfp_app *app, u8 pf,
 	stats->rx_dropped = readq(mem + NFP_NET_CFG_STATS_TX_DISCARDS);
 }
 
-void
-nfp_repr_get_stats64(const struct nfp_app *app, enum nfp_repr_type type,
-		     u8 port, struct rtnl_link_stats64 *stats)
+static void
+nfp_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
 {
-	switch (type) {
-	case NFP_REPR_TYPE_PHYS_PORT:
-		nfp_repr_phy_port_get_stats64(app, port, stats);
+	struct nfp_repr *repr = netdev_priv(netdev);
+	struct nfp_eth_table_port *eth_port;
+	struct nfp_app *app = repr->app;
+
+	if (WARN_ON(!repr->port))
+		return;
+
+	switch (repr->port->type) {
+	case NFP_PORT_PHYS_PORT:
+		eth_port = __nfp_port_get_eth_port(repr->port);
+		if (!eth_port)
+			break;
+		nfp_repr_phy_port_get_stats64(app, eth_port->index, stats);
 		break;
-	case NFP_REPR_TYPE_PF:
-		nfp_repr_pf_get_stats64(app, port, stats);
+	case NFP_PORT_PF_PORT:
+		nfp_repr_pf_get_stats64(app, repr->port->pf_id, stats);
 		break;
-	case NFP_REPR_TYPE_VF:
-		nfp_repr_vf_get_stats64(app, port, stats);
+	case NFP_PORT_VF_PORT:
+		nfp_repr_vf_get_stats64(app, repr->port->vf_id, stats);
 	default:
 		break;
 	}
 }
 
-bool
+static bool
 nfp_repr_has_offload_stats(const struct net_device *dev, int attr_id)
 {
 	switch (attr_id) {
@@ -196,8 +207,9 @@ nfp_repr_get_host_stats64(const struct net_device *netdev,
 	return 0;
 }
 
-int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
-			       void *stats)
+static int
+nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
+			   void *stats)
 {
 	switch (attr_id) {
 	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -207,7 +219,7 @@ int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
 	return -EINVAL;
 }
 
-netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
 	unsigned int len = skb->len;
@@ -224,6 +236,31 @@ netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
 	return ret;
 }
 
+static int nfp_repr_stop(struct net_device *netdev)
+{
+	struct nfp_repr *repr = netdev_priv(netdev);
+
+	return nfp_app_repr_stop(repr->app, repr);
+}
+
+static int nfp_repr_open(struct net_device *netdev)
+{
+	struct nfp_repr *repr = netdev_priv(netdev);
+
+	return nfp_app_repr_open(repr->app, repr);
+}
+
+const struct net_device_ops nfp_repr_netdev_ops = {
+	.ndo_open		= nfp_repr_open,
+	.ndo_stop		= nfp_repr_stop,
+	.ndo_start_xmit		= nfp_repr_xmit,
+	.ndo_get_stats64	= nfp_repr_get_stats64,
+	.ndo_has_offload_stats	= nfp_repr_has_offload_stats,
+	.ndo_get_offload_stats	= nfp_repr_get_offload_stats,
+	.ndo_get_phys_port_name	= nfp_port_get_phys_port_name,
+	.ndo_setup_tc		= nfp_port_setup_tc,
+};
+
 static void nfp_repr_clean(struct nfp_repr *repr)
 {
 	unregister_netdev(repr->netdev);
@@ -248,8 +285,8 @@ static void nfp_repr_set_lockdep_class(struct net_device *dev)
 }
 
 int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
-		  const struct net_device_ops *netdev_ops, u32 cmsg_port_id,
-		  struct nfp_port *port, struct net_device *pf_netdev)
+		  u32 cmsg_port_id, struct nfp_port *port,
+		  struct net_device *pf_netdev)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
 	int err;
@@ -263,7 +300,13 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
 	repr->dst->u.port_info.port_id = cmsg_port_id;
 	repr->dst->u.port_info.lower_dev = pf_netdev;
 
-	netdev->netdev_ops = netdev_ops;
+	netdev->netdev_ops = &nfp_repr_netdev_ops;
+	SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
+
+	if (nfp_app_has_tc(app)) {
+		netdev->features |= NETIF_F_HW_TC;
+		netdev->hw_features |= NETIF_F_HW_TC;
+	}
 
 	err = register_netdev(netdev);
 	if (err)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
index c5ed6611f708..32179cad062a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
@@ -38,6 +38,8 @@ struct metadata_dst;
 struct nfp_net;
 struct nfp_port;
 
+#include <net/dst_metadata.h>
+
 /**
  * struct nfp_reprs - container for representor netdevs
  * @num_reprs:	Number of elements in reprs array
@@ -97,16 +99,22 @@ enum nfp_repr_type {
 };
 #define NFP_REPR_TYPE_MAX (__NFP_REPR_TYPE_MAX - 1)
 
+extern const struct net_device_ops nfp_repr_netdev_ops;
+
+static inline bool nfp_netdev_is_nfp_repr(struct net_device *netdev)
+{
+	return netdev->netdev_ops == &nfp_repr_netdev_ops;
+}
+
+static inline int nfp_repr_get_port_id(struct net_device *netdev)
+{
+	struct nfp_repr *priv = netdev_priv(netdev);
+
+	return priv->dst->u.port_info.port_id;
+}
+
 void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len);
-void
-nfp_repr_get_stats64(const struct nfp_app *app, enum nfp_repr_type type,
-		     u8 port, struct rtnl_link_stats64 *stats);
-bool nfp_repr_has_offload_stats(const struct net_device *dev, int attr_id);
-int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
-			       void *stats);
-netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev);
 int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
-		  const struct net_device_ops *netdev_ops,
 		  u32 cmsg_port_id, struct nfp_port *port,
 		  struct net_device *pf_netdev);
 struct net_device *nfp_repr_alloc(struct nfp_app *app);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index 19bceeb82225..776e54dd5dd0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -32,6 +32,7 @@
  */
 
 #include <linux/lockdep.h>
+#include <net/switchdev.h>
 
 #include "nfpcore/nfp_cpp.h"
 #include "nfpcore/nfp_nsp.h"
@@ -42,13 +43,64 @@
 
 struct nfp_port *nfp_port_from_netdev(struct net_device *netdev)
 {
-	struct nfp_net *nn;
+	if (nfp_netdev_is_nfp_net(netdev)) {
+		struct nfp_net *nn = netdev_priv(netdev);
 
-	if (WARN_ON(!nfp_netdev_is_nfp_net(netdev)))
-		return NULL;
-	nn = netdev_priv(netdev);
+		return nn->port;
+	}
+
+	if (nfp_netdev_is_nfp_repr(netdev)) {
+		struct nfp_repr *repr = netdev_priv(netdev);
+
+		return repr->port;
+	}
 
-	return nn->port;
+	WARN(1, "Unknown netdev type for nfp_port\n");
+
+	return NULL;
+}
+
+static int
+nfp_port_attr_get(struct net_device *netdev, struct switchdev_attr *attr)
+{
+	struct nfp_port *port;
+
+	port = nfp_port_from_netdev(netdev);
+	if (!port)
+		return -EOPNOTSUPP;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: {
+		const u8 *serial;
+		/* N.B: attr->u.ppid.id is binary data */
+		attr->u.ppid.id_len = nfp_cpp_serial(port->app->cpp, &serial);
+		memcpy(&attr->u.ppid.id, serial, attr->u.ppid.id_len);
+		break;
+	}
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+const struct switchdev_ops nfp_port_switchdev_ops = {
+	.switchdev_port_attr_get	= nfp_port_attr_get,
+};
+
+int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
+		      __be16 proto, struct tc_to_netdev *tc)
+{
+	struct nfp_port *port;
+
+	if (chain_index)
+		return -EOPNOTSUPP;
+
+	port = nfp_port_from_netdev(netdev);
+	if (!port)
+		return -EOPNOTSUPP;
+
+	return nfp_app_setup_tc(port->app, netdev, handle, proto, tc);
 }
 
 struct nfp_port *
@@ -98,15 +150,31 @@ nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
 	int n;
 
 	port = nfp_port_from_netdev(netdev);
-	eth_port = __nfp_port_get_eth_port(port);
-	if (!eth_port)
+	if (!port)
+		return -EOPNOTSUPP;
+
+	switch (port->type) {
+	case NFP_PORT_PHYS_PORT:
+		eth_port = __nfp_port_get_eth_port(port);
+		if (!eth_port)
+			return -EOPNOTSUPP;
+
+		if (!eth_port->is_split)
+			n = snprintf(name, len, "p%d", eth_port->label_port);
+		else
+			n = snprintf(name, len, "p%ds%d", eth_port->label_port,
+				     eth_port->label_subport);
+		break;
+	case NFP_PORT_PF_PORT:
+		n = snprintf(name, len, "pf%d", port->pf_id);
+		break;
+	case NFP_PORT_VF_PORT:
+		n = snprintf(name, len, "pf%dvf%d", port->pf_id, port->vf_id);
+		break;
+	default:
 		return -EOPNOTSUPP;
+	}
 
-	if (!eth_port->is_split)
-		n = snprintf(name, len, "p%d", eth_port->label_port);
-	else
-		n = snprintf(name, len, "p%ds%d", eth_port->label_port,
-			     eth_port->label_subport);
 	if (n >= len)
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index f472bea4ec2b..a33d22e18f94 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -36,6 +36,7 @@
 
 #include <net/devlink.h>
 
+struct tc_to_netdev;
 struct net_device;
 struct nfp_app;
 struct nfp_pf;
@@ -47,10 +48,14 @@ struct nfp_port;
  *			state when port disappears because of FW fault or config
  *			change
  * @NFP_PORT_PHYS_PORT:	external NIC port
+ * @NFP_PORT_PF_PORT:	logical port of PCI PF
+ * @NFP_PORT_VF_PORT:	logical port of PCI VF
  */
 enum nfp_port_type {
 	NFP_PORT_INVALID,
 	NFP_PORT_PHYS_PORT,
+	NFP_PORT_PF_PORT,
+	NFP_PORT_VF_PORT,
 };
 
 /**
@@ -72,6 +77,8 @@ enum nfp_port_flags {
  * @dl_port:	devlink port structure
  * @eth_id:	for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme
  * @eth_port:	for %NFP_PORT_PHYS_PORT translated ETH Table port entry
+ * @pf_id:	for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT ID of the PCI PF (0-3)
+ * @vf_id:	for %NFP_PORT_VF_PORT ID of the PCI VF within @pf_id
  * @port_list:	entry on pf's list of ports
  */
 struct nfp_port {
@@ -84,12 +91,27 @@ struct nfp_port {
 
 	struct devlink_port dl_port;
 
-	unsigned int eth_id;
-	struct nfp_eth_table_port *eth_port;
+	union {
+		/* NFP_PORT_PHYS_PORT */
+		struct {
+			unsigned int eth_id;
+			struct nfp_eth_table_port *eth_port;
+		};
+		/* NFP_PORT_PF_PORT, NFP_PORT_VF_PORT */
+		struct {
+			unsigned int pf_id;
+			unsigned int vf_id;
+		};
+	};
 
 	struct list_head port_list;
 };
 
+extern const struct switchdev_ops nfp_port_switchdev_ops;
+
+int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
+		      __be16 proto, struct tc_to_netdev *tc);
+
 struct nfp_port *nfp_port_from_netdev(struct net_device *netdev);
 struct nfp_port *
 nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id);
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
index 25a967158ce9..5798adc57cbc 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
@@ -230,6 +230,9 @@ struct nfp_cpp_area *nfp_cpp_area_alloc_with_name(struct nfp_cpp *cpp,
 struct nfp_cpp_area *nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 cpp_id,
 					unsigned long long address,
 					unsigned long size);
+struct nfp_cpp_area *
+nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, const char *name, u32 cpp_id,
+			   unsigned long long address, unsigned long size);
 void nfp_cpp_area_free(struct nfp_cpp_area *area);
 int nfp_cpp_area_acquire(struct nfp_cpp_area *area);
 int nfp_cpp_area_acquire_nonblocking(struct nfp_cpp_area *area);
@@ -239,8 +242,6 @@ int nfp_cpp_area_read(struct nfp_cpp_area *area, unsigned long offset,
 		      void *buffer, size_t length);
 int nfp_cpp_area_write(struct nfp_cpp_area *area, unsigned long offset,
 		       const void *buffer, size_t length);
-int nfp_cpp_area_check_range(struct nfp_cpp_area *area,
-			     unsigned long long offset, unsigned long size);
 const char *nfp_cpp_area_name(struct nfp_cpp_area *cpp_area);
 void *nfp_cpp_area_priv(struct nfp_cpp_area *cpp_area);
 struct nfp_cpp *nfp_cpp_area_cpp(struct nfp_cpp_area *cpp_area);
@@ -278,6 +279,10 @@ int nfp_cpp_readq(struct nfp_cpp *cpp, u32 cpp_id,
 int nfp_cpp_writeq(struct nfp_cpp *cpp, u32 cpp_id,
 		   unsigned long long address, u64 value);
 
+u8 __iomem *
+nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, int domain, int target,
+		 u64 addr, unsigned long size, struct nfp_cpp_area **area);
+
 struct nfp_cpp_mutex;
 
 int nfp_cpp_mutex_init(struct nfp_cpp *cpp, int target,
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
index 9b69dcf87be9..04dd5758ecf5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
@@ -361,6 +361,41 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest,
 }
 
 /**
+ * nfp_cpp_area_alloc_acquire() - allocate a new CPP area and lock it down
+ * @cpp:	CPP handle
+ * @name:	Name of region
+ * @dest:	CPP id
+ * @address:	Start address on CPP target
+ * @size:	Size of area
+ *
+ * Allocate and initialize a CPP area structure, and lock it down so
+ * that it can be accessed directly.
+ *
+ * NOTE: @address and @size must be 32-bit aligned values.
+ *
+ * NOTE: The area must also be 'released' when the structure is freed.
+ *
+ * Return: NFP CPP Area handle, or NULL
+ */
+struct nfp_cpp_area *
+nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, const char *name, u32 dest,
+			   unsigned long long address, unsigned long size)
+{
+	struct nfp_cpp_area *area;
+
+	area = nfp_cpp_area_alloc_with_name(cpp, dest, name, address, size);
+	if (!area)
+		return NULL;
+
+	if (nfp_cpp_area_acquire(area)) {
+		nfp_cpp_area_free(area);
+		return NULL;
+	}
+
+	return area;
+}
+
+/**
  * nfp_cpp_area_free() - free up the CPP area
  * @area:	CPP area handle
  *
@@ -536,27 +571,6 @@ int nfp_cpp_area_write(struct nfp_cpp_area *area,
 }
 
 /**
- * nfp_cpp_area_check_range() - check if address range fits in CPP area
- * @area:	CPP area handle
- * @offset:	offset into CPP target
- * @length:	size of address range in bytes
- *
- * Check if address range fits within CPP area.  Return 0 if area
- * fits or -EFAULT on error.
- *
- * Return: 0, or -ERRNO
- */
-int nfp_cpp_area_check_range(struct nfp_cpp_area *area,
-			     unsigned long long offset, unsigned long length)
-{
-	if (offset < area->offset ||
-	    offset + length > area->offset + area->size)
-		return -EFAULT;
-
-	return 0;
-}
-
-/**
  * nfp_cpp_area_name() - return name of a CPP area
  * @cpp_area:	CPP area handle
  *
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
index 0ba0379b8f75..ab86bceb93f2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
@@ -279,3 +279,43 @@ exit_release:
 
 	return err;
 }
+
+/**
+ * nfp_cpp_map_area() - Helper function to map an area
+ * @cpp:    NFP CPP handler
+ * @name:   Name for the area
+ * @domain: CPP domain
+ * @target: CPP target
+ * @addr:   CPP address
+ * @size:   Size of the area
+ * @area:   Area handle (output)
+ *
+ * Map an area of IOMEM access.  To undo the effect of this function call
+ * @nfp_cpp_area_release_free(*area).
+ *
+ * Return: Pointer to memory mapped area or ERR_PTR
+ */
+u8 __iomem *
+nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, int domain, int target,
+		 u64 addr, unsigned long size, struct nfp_cpp_area **area)
+{
+	u8 __iomem *res;
+	u32 dest;
+
+	dest = NFP_CPP_ISLAND_ID(target, NFP_CPP_ACTION_RW, 0, domain);
+
+	*area = nfp_cpp_area_alloc_acquire(cpp, name, dest, addr, size);
+	if (!*area)
+		goto err_eio;
+
+	res = nfp_cpp_area_iomem(*area);
+	if (!res)
+		goto err_release_free;
+
+	return res;
+
+err_release_free:
+	nfp_cpp_area_release_free(*area);
+err_eio:
+	return (u8 __iomem *)ERR_PTR(-EIO);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
index d27d29782a12..c9724fb7ea4b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
@@ -97,7 +97,11 @@ int nfp_rtsym_count(struct nfp_rtsym_table *rtbl);
 const struct nfp_rtsym *nfp_rtsym_get(struct nfp_rtsym_table *rtbl, int idx);
 const struct nfp_rtsym *
 nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name);
+
 u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name,
 		      int *error);
+u8 __iomem *
+nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id,
+	      unsigned int min_size, struct nfp_cpp_area **area);
 
 #endif /* NFP_NFFW_H */
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
index 203f9cbae0fb..ecda474ac7c3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
@@ -289,3 +289,30 @@ exit:
 		return ~0ULL;
 	return val;
 }
+
+u8 __iomem *
+nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id,
+	      unsigned int min_size, struct nfp_cpp_area **area)
+{
+	const struct nfp_rtsym *sym;
+	u8 __iomem *mem;
+
+	sym = nfp_rtsym_lookup(rtbl, name);
+	if (!sym)
+		return (u8 __iomem *)ERR_PTR(-ENOENT);
+
+	if (sym->size < min_size) {
+		nfp_err(rtbl->cpp, "Symbol %s too small\n", name);
+		return (u8 __iomem *)ERR_PTR(-EINVAL);
+	}
+
+	mem = nfp_cpp_map_area(rtbl->cpp, id, sym->domain, sym->target,
+			       sym->addr, sym->size, area);
+	if (IS_ERR(mem)) {
+		nfp_err(rtbl->cpp, "Failed to map symbol %s: %ld\n",
+			name, PTR_ERR(mem));
+		return mem;
+	}
+
+	return mem;
+}
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
index e30676515529..6cec2a6a3dcc 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
@@ -174,7 +174,6 @@ netxen_setup_minidump(struct netxen_adapter *adapter)
 {
 	int err = 0, i;
 	u32 *template, *tmp_buf;
-	struct netxen_minidump_template_hdr *hdr;
 	err = netxen_get_minidump_template_size(adapter);
 	if (err) {
 		adapter->mdump.fw_supports_md = 0;
@@ -218,8 +217,6 @@ netxen_setup_minidump(struct netxen_adapter *adapter)
 	template = (u32 *) adapter->mdump.md_template;
 	for (i = 0; i < adapter->mdump.md_template_size/sizeof(u32); i++)
 		*template++ = __le32_to_cpu(*tmp_buf++);
-	hdr = (struct netxen_minidump_template_hdr *)
-				adapter->mdump.md_template;
 	adapter->mdump.md_capture_buff = NULL;
 	adapter->mdump.fw_supports_md = 1;
 	adapter->mdump.md_enabled = 0;
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 67452380b60e..82dd47068e18 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -5,6 +5,6 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
 	 qed_selftest.o qed_dcbx.o qed_debug.o qed_ptp.o
 qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
 qed-$(CONFIG_QED_LL2) += qed_ll2.o
-qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o
+qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o qed_iwarp.o
 qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o
 qed-$(CONFIG_QED_FCOE) += qed_fcoe.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 14b08ee9e3ad..91003bc6f00b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -210,14 +210,16 @@ struct qed_tunn_update_params {
 
 /* The PCI personality is not quite synonymous to protocol ID:
  * 1. All personalities need CORE connections
- * 2. The Ethernet personality may support also the RoCE protocol
+ * 2. The Ethernet personality may support also the RoCE/iWARP protocol
  */
 enum qed_pci_personality {
 	QED_PCI_ETH,
 	QED_PCI_FCOE,
 	QED_PCI_ISCSI,
 	QED_PCI_ETH_ROCE,
-	QED_PCI_DEFAULT /* default in shmem */
+	QED_PCI_ETH_IWARP,
+	QED_PCI_ETH_RDMA,
+	QED_PCI_DEFAULT, /* default in shmem */
 };
 
 /* All VFs are symmetric, all counters are PF + all VFs */
@@ -277,6 +279,7 @@ enum qed_dev_cap {
 	QED_DEV_CAP_FCOE,
 	QED_DEV_CAP_ISCSI,
 	QED_DEV_CAP_ROCE,
+	QED_DEV_CAP_IWARP,
 };
 
 enum qed_wol_support {
@@ -286,7 +289,24 @@ enum qed_wol_support {
 
 struct qed_hw_info {
 	/* PCI personality */
-	enum qed_pci_personality	personality;
+	enum qed_pci_personality personality;
+#define QED_IS_RDMA_PERSONALITY(dev)			    \
+	((dev)->hw_info.personality == QED_PCI_ETH_ROCE ||  \
+	 (dev)->hw_info.personality == QED_PCI_ETH_IWARP || \
+	 (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_ROCE_PERSONALITY(dev)			   \
+	((dev)->hw_info.personality == QED_PCI_ETH_ROCE || \
+	 (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_IWARP_PERSONALITY(dev)			    \
+	((dev)->hw_info.personality == QED_PCI_ETH_IWARP || \
+	 (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_L2_PERSONALITY(dev)		      \
+	((dev)->hw_info.personality == QED_PCI_ETH || \
+	 QED_IS_RDMA_PERSONALITY(dev))
+#define QED_IS_FCOE_PERSONALITY(dev) \
+	((dev)->hw_info.personality == QED_PCI_FCOE)
+#define QED_IS_ISCSI_PERSONALITY(dev) \
+	((dev)->hw_info.personality == QED_PCI_ISCSI)
 
 	/* Resource Allocation scheme results */
 	u32				resc_start[QED_MAX_RESC];
@@ -759,7 +779,7 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
 }
 
 #define PURE_LB_TC 8
-#define OOO_LB_TC 9
+#define PKT_LB_TC 9
 
 int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate);
 void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
@@ -769,6 +789,8 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
 void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 int qed_device_num_engines(struct qed_dev *cdev);
 int qed_device_get_port_id(struct qed_dev *cdev);
+void qed_set_fw_mac_addr(__le16 *fw_msb,
+			 __le16 *fw_mid, __le16 *fw_lsb, u8 *mac);
 
 #define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index e201214764db..af106be8cc08 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -246,14 +246,16 @@ struct qed_cxt_mngr {
 static bool src_proto(enum protocol_type type)
 {
 	return type == PROTOCOLID_ISCSI ||
-	       type == PROTOCOLID_FCOE;
+	       type == PROTOCOLID_FCOE ||
+	       type == PROTOCOLID_IWARP;
 }
 
 static bool tm_cid_proto(enum protocol_type type)
 {
 	return type == PROTOCOLID_ISCSI ||
 	       type == PROTOCOLID_FCOE ||
-	       type == PROTOCOLID_ROCE;
+	       type == PROTOCOLID_ROCE ||
+	       type == PROTOCOLID_IWARP;
 }
 
 static bool tm_tid_proto(enum protocol_type type)
@@ -853,7 +855,7 @@ u32 qed_cxt_cfg_ilt_compute_excess(struct qed_hwfn *p_hwfn, u32 used_lines)
 	if (!excess_lines)
 		return 0;
 
-	if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+	if (!QED_IS_RDMA_PERSONALITY(p_hwfn))
 		return 0;
 
 	p_mngr = p_hwfn->p_cxt_mngr;
@@ -1033,7 +1035,7 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn,
 	u32 lines, line, sz_left, lines_to_skip = 0;
 
 	/* Special handling for RoCE that supports dynamic allocation */
-	if ((p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) &&
+	if (QED_IS_RDMA_PERSONALITY(p_hwfn) &&
 	    ((ilt_client == ILT_CLI_CDUT) || ilt_client == ILT_CLI_TSDM))
 		return 0;
 
@@ -1833,7 +1835,7 @@ static void qed_tm_init_pf(struct qed_hwfn *p_hwfn)
 		tm_offset += tm_iids.pf_tids[i];
 	}
 
-	if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE)
+	if (QED_IS_RDMA_PERSONALITY(p_hwfn))
 		active_seg_mask = 0;
 
 	STORE_RT_REG(p_hwfn, TM_REG_PF_ENABLE_TASK_RT_OFFSET, active_seg_mask);
@@ -2068,6 +2070,11 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
 	num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs);
 
 	switch (p_hwfn->hw_info.personality) {
+	case QED_PCI_ETH_IWARP:
+		/* Each QP requires one connection */
+		num_cons = min_t(u32, IWARP_MAX_QPS, p_params->num_qps);
+		proto = PROTOCOLID_IWARP;
+		break;
 	case QED_PCI_ETH_ROCE:
 		num_qps = min_t(u32, ROCE_MAX_QPS, p_params->num_qps);
 		num_cons = num_qps * 2;	/* each QP requires two connections */
@@ -2103,6 +2110,8 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks)
 	qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_CORE, core_cids, 0);
 
 	switch (p_hwfn->hw_info.personality) {
+	case QED_PCI_ETH_RDMA:
+	case QED_PCI_ETH_IWARP:
 	case QED_PCI_ETH_ROCE:
 	{
 			qed_rdma_set_pf_params(p_hwfn,
@@ -2344,7 +2353,7 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
 		       last_cid_allocated - 1);
 
 		if (!p_hwfn->b_rdma_enabled_in_prs) {
-			/* Enable RoCE search */
+			/* Enable RDMA search */
 			qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1);
 			p_hwfn->b_rdma_enabled_in_prs = true;
 		}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 49667ad9042d..6c87bed13bd2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -216,6 +216,10 @@ static u32 qed_get_pq_flags(struct qed_hwfn *p_hwfn)
 	case QED_PCI_ETH_ROCE:
 		flags |= PQ_FLAGS_MCOS | PQ_FLAGS_OFLD | PQ_FLAGS_LLT;
 		break;
+	case QED_PCI_ETH_IWARP:
+		flags |= PQ_FLAGS_MCOS | PQ_FLAGS_ACK | PQ_FLAGS_OOO |
+		    PQ_FLAGS_OFLD;
+		break;
 	default:
 		DP_ERR(p_hwfn,
 		       "unknown personality %d\n", p_hwfn->hw_info.personality);
@@ -936,9 +940,16 @@ int qed_resc_alloc(struct qed_dev *cdev)
 
 		/* EQ */
 		n_eqes = qed_chain_get_capacity(&p_hwfn->p_spq->chain);
-		if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+		if (QED_IS_RDMA_PERSONALITY(p_hwfn)) {
+			enum protocol_type rdma_proto;
+
+			if (QED_IS_ROCE_PERSONALITY(p_hwfn))
+				rdma_proto = PROTOCOLID_ROCE;
+			else
+				rdma_proto = PROTOCOLID_IWARP;
+
 			num_cons = qed_cxt_get_proto_cid_count(p_hwfn,
-							       PROTOCOLID_ROCE,
+							       rdma_proto,
 							       NULL) * 2;
 			n_eqes += num_cons + 2 * MAX_NUM_VFS_BB;
 		} else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
@@ -2057,7 +2068,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 	qed_int_get_num_sbs(p_hwfn, &sb_cnt);
 
 	if (IS_ENABLED(CONFIG_QED_RDMA) &&
-	    p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+	    QED_IS_RDMA_PERSONALITY(p_hwfn)) {
 		/* Roce CNQ each requires: 1 status block + 1 CNQ. We divide
 		 * the status blocks equally between L2 / RoCE but with
 		 * consideration as to how many l2 queues / cnqs we have.
@@ -2068,9 +2079,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 
 		non_l2_sbs = feat_num[QED_RDMA_CNQ];
 	}
-
-	if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE ||
-	    p_hwfn->hw_info.personality == QED_PCI_ETH) {
+	if (QED_IS_L2_PERSONALITY(p_hwfn)) {
 		/* Start by allocating VF queues, then PF's */
 		feat_num[QED_VF_L2_QUE] = min_t(u32,
 						RESC_NUM(p_hwfn, QED_L2_QUEUE),
@@ -2083,12 +2092,12 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 							 QED_VF_L2_QUE));
 	}
 
-	if (p_hwfn->hw_info.personality == QED_PCI_FCOE)
+	if (QED_IS_FCOE_PERSONALITY(p_hwfn))
 		feat_num[QED_FCOE_CQ] =  min_t(u32, sb_cnt.cnt,
 					       RESC_NUM(p_hwfn,
 							QED_CMDQS_CQS));
 
-	if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
 		feat_num[QED_ISCSI_CQ] = min_t(u32, sb_cnt.cnt,
 					       RESC_NUM(p_hwfn,
 							QED_CMDQS_CQS));
@@ -4122,3 +4131,14 @@ int qed_device_get_port_id(struct qed_dev *cdev)
 {
 	return (QED_LEADING_HWFN(cdev)->abs_pf_id) % qed_device_num_ports(cdev);
 }
+
+void qed_set_fw_mac_addr(__le16 *fw_msb,
+			 __le16 *fw_mid, __le16 *fw_lsb, u8 *mac)
+{
+	((u8 *)fw_msb)[0] = mac[1];
+	((u8 *)fw_msb)[1] = mac[0];
+	((u8 *)fw_mid)[0] = mac[3];
+	((u8 *)fw_mid)[1] = mac[2];
+	((u8 *)fw_lsb)[0] = mac[5];
+	((u8 *)fw_lsb)[1] = mac[4];
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 3bf3614b3084..31fb0bffa098 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -46,6 +46,7 @@
 #include <linux/qed/fcoe_common.h>
 #include <linux/qed/eth_common.h>
 #include <linux/qed/iscsi_common.h>
+#include <linux/qed/iwarp_common.h>
 #include <linux/qed/rdma_common.h>
 #include <linux/qed/roce_common.h>
 #include <linux/qed/qed_fcoe_if.h>
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
new file mode 100644
index 000000000000..5cd20da2d4e0
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -0,0 +1,2409 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/spinlock.h>
+#include <linux/tcp.h>
+#include "qed_cxt.h"
+#include "qed_hw.h"
+#include "qed_ll2.h"
+#include "qed_rdma.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+
+#define QED_IWARP_ORD_DEFAULT		32
+#define QED_IWARP_IRD_DEFAULT		32
+#define QED_IWARP_MAX_FW_MSS		4120
+
+#define QED_EP_SIG 0xecabcdef
+
+struct mpa_v2_hdr {
+	__be16 ird;
+	__be16 ord;
+};
+
+#define MPA_V2_PEER2PEER_MODEL  0x8000
+#define MPA_V2_SEND_RTR         0x4000	/* on ird */
+#define MPA_V2_READ_RTR         0x4000	/* on ord */
+#define MPA_V2_WRITE_RTR        0x8000
+#define MPA_V2_IRD_ORD_MASK     0x3FFF
+
+#define MPA_REV2(_mpa_rev) ((_mpa_rev) == MPA_NEGOTIATION_TYPE_ENHANCED)
+
+#define QED_IWARP_INVALID_TCP_CID	0xffffffff
+#define QED_IWARP_RCV_WND_SIZE_DEF	(256 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_MIN	(64 * 1024)
+#define TIMESTAMP_HEADER_SIZE		(12)
+
+#define QED_IWARP_TS_EN			BIT(0)
+#define QED_IWARP_DA_EN			BIT(1)
+#define QED_IWARP_PARAM_CRC_NEEDED	(1)
+#define QED_IWARP_PARAM_P2P		(1)
+
+static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
+				 u8 fw_event_code, u16 echo,
+				 union event_ring_data *data,
+				 u8 fw_return_code);
+
+/* Override devinfo with iWARP specific values */
+void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn)
+{
+	struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
+
+	dev->max_inline = IWARP_REQ_MAX_INLINE_DATA_SIZE;
+	dev->max_qp = min_t(u32,
+			    IWARP_MAX_QPS,
+			    p_hwfn->p_rdma_info->num_qps) -
+		      QED_IWARP_PREALLOC_CNT;
+
+	dev->max_cq = dev->max_qp;
+
+	dev->max_qp_resp_rd_atomic_resc = QED_IWARP_IRD_DEFAULT;
+	dev->max_qp_req_rd_atomic_resc = QED_IWARP_ORD_DEFAULT;
+}
+
+void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_TCP;
+	qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1);
+	p_hwfn->b_rdma_enabled_in_prs = true;
+}
+
+/* We have two cid maps, one for tcp which should be used only from passive
+ * syn processing and replacing a pre-allocated ep in the list. The second
+ * for active tcp and for QPs.
+ */
+static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid)
+{
+	cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+
+	if (cid < QED_IWARP_PREALLOC_CNT)
+		qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map,
+				    cid);
+	else
+		qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid)
+{
+	int rc;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed in allocating iwarp cid\n");
+		return rc;
+	}
+	*cid += qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+	rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, *cid);
+	if (rc)
+		qed_iwarp_cid_cleaned(p_hwfn, *cid);
+
+	return rc;
+}
+
+static void qed_iwarp_set_tcp_cid(struct qed_hwfn *p_hwfn, u32 cid)
+{
+	cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	qed_bmap_set_id(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, cid);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+/* This function allocates a cid for passive tcp (called from syn receive)
+ * the reason it's separate from the regular cid allocation is because it
+ * is assured that these cids already have ilt allocated. They are preallocated
+ * to ensure that we won't need to allocate memory during syn processing
+ */
+static int qed_iwarp_alloc_tcp_cid(struct qed_hwfn *p_hwfn, u32 *cid)
+{
+	int rc;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+
+	rc = qed_rdma_bmap_alloc_id(p_hwfn,
+				    &p_hwfn->p_rdma_info->tcp_cid_map, cid);
+
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "can't allocate iwarp tcp cid max-count=%d\n",
+			   p_hwfn->p_rdma_info->tcp_cid_map.max_count);
+
+		*cid = QED_IWARP_INVALID_TCP_CID;
+		return rc;
+	}
+
+	*cid += qed_cxt_get_proto_cid_start(p_hwfn,
+					    p_hwfn->p_rdma_info->proto);
+	return 0;
+}
+
+int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn,
+			struct qed_rdma_qp *qp,
+			struct qed_rdma_create_qp_out_params *out_params)
+{
+	struct iwarp_create_qp_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	u16 physical_queue;
+	u32 cid;
+	int rc;
+
+	qp->shared_queue = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+					      IWARP_SHARED_QUEUE_PAGE_SIZE,
+					      &qp->shared_queue_phys_addr,
+					      GFP_KERNEL);
+	if (!qp->shared_queue)
+		return -ENOMEM;
+
+	out_params->sq_pbl_virt = (u8 *)qp->shared_queue +
+	    IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET;
+	out_params->sq_pbl_phys = qp->shared_queue_phys_addr +
+	    IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET;
+	out_params->rq_pbl_virt = (u8 *)qp->shared_queue +
+	    IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET;
+	out_params->rq_pbl_phys = qp->shared_queue_phys_addr +
+	    IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET;
+
+	rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+	if (rc)
+		goto err1;
+
+	qp->icid = (u16)cid;
+
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.cid = qp->icid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_CREATE_QP,
+				 PROTOCOLID_IWARP, &init_data);
+	if (rc)
+		goto err2;
+
+	p_ramrod = &p_ent->ramrod.iwarp_create_qp;
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_FMR_AND_RESERVED_EN,
+		  qp->fmr_and_reserved_lkey);
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_SIGNALED_COMP, qp->signal_all);
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_RDMA_RD_EN,
+		  qp->incoming_rdma_read_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_RDMA_WR_EN,
+		  qp->incoming_rdma_write_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_ATOMIC_EN,
+		  qp->incoming_atomic_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG, qp->use_srq);
+
+	p_ramrod->pd = qp->pd;
+	p_ramrod->sq_num_pages = qp->sq_num_pages;
+	p_ramrod->rq_num_pages = qp->rq_num_pages;
+
+	p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi);
+	p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo);
+
+	p_ramrod->cq_cid_for_sq =
+	    cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->sq_cq_id);
+	p_ramrod->cq_cid_for_rq =
+	    cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->rq_cq_id);
+
+	p_ramrod->dpi = cpu_to_le16(qp->dpi);
+
+	physical_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+	p_ramrod->physical_q0 = cpu_to_le16(physical_queue);
+	physical_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK);
+	p_ramrod->physical_q1 = cpu_to_le16(physical_queue);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc)
+		goto err2;
+
+	return rc;
+
+err2:
+	qed_iwarp_cid_cleaned(p_hwfn, cid);
+err1:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  IWARP_SHARED_QUEUE_PAGE_SIZE,
+			  qp->shared_queue, qp->shared_queue_phys_addr);
+
+	return rc;
+}
+
+static int qed_iwarp_modify_fw(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+	struct iwarp_modify_qp_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	int rc;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_MODIFY_QP,
+				 p_hwfn->p_rdma_info->proto, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iwarp_modify_qp;
+	SET_FIELD(p_ramrod->flags, IWARP_MODIFY_QP_RAMROD_DATA_STATE_TRANS_EN,
+		  0x1);
+	if (qp->iwarp_state == QED_IWARP_QP_STATE_CLOSING)
+		p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_CLOSING;
+	else
+		p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_ERROR;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x)rc=%d\n", qp->icid, rc);
+
+	return rc;
+}
+
+enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state)
+{
+	switch (state) {
+	case QED_ROCE_QP_STATE_RESET:
+	case QED_ROCE_QP_STATE_INIT:
+	case QED_ROCE_QP_STATE_RTR:
+		return QED_IWARP_QP_STATE_IDLE;
+	case QED_ROCE_QP_STATE_RTS:
+		return QED_IWARP_QP_STATE_RTS;
+	case QED_ROCE_QP_STATE_SQD:
+		return QED_IWARP_QP_STATE_CLOSING;
+	case QED_ROCE_QP_STATE_ERR:
+		return QED_IWARP_QP_STATE_ERROR;
+	case QED_ROCE_QP_STATE_SQE:
+		return QED_IWARP_QP_STATE_TERMINATE;
+	default:
+		return QED_IWARP_QP_STATE_ERROR;
+	}
+}
+
+static enum qed_roce_qp_state
+qed_iwarp2roce_state(enum qed_iwarp_qp_state state)
+{
+	switch (state) {
+	case QED_IWARP_QP_STATE_IDLE:
+		return QED_ROCE_QP_STATE_INIT;
+	case QED_IWARP_QP_STATE_RTS:
+		return QED_ROCE_QP_STATE_RTS;
+	case QED_IWARP_QP_STATE_TERMINATE:
+		return QED_ROCE_QP_STATE_SQE;
+	case QED_IWARP_QP_STATE_CLOSING:
+		return QED_ROCE_QP_STATE_SQD;
+	case QED_IWARP_QP_STATE_ERROR:
+		return QED_ROCE_QP_STATE_ERR;
+	default:
+		return QED_ROCE_QP_STATE_ERR;
+	}
+}
+
+const char *iwarp_state_names[] = {
+	"IDLE",
+	"RTS",
+	"TERMINATE",
+	"CLOSING",
+	"ERROR",
+};
+
+int
+qed_iwarp_modify_qp(struct qed_hwfn *p_hwfn,
+		    struct qed_rdma_qp *qp,
+		    enum qed_iwarp_qp_state new_state, bool internal)
+{
+	enum qed_iwarp_qp_state prev_iw_state;
+	bool modify_fw = false;
+	int rc = 0;
+
+	/* modify QP can be called from upper-layer or as a result of async
+	 * RST/FIN... therefore need to protect
+	 */
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+	prev_iw_state = qp->iwarp_state;
+
+	if (prev_iw_state == new_state) {
+		spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+		return 0;
+	}
+
+	switch (prev_iw_state) {
+	case QED_IWARP_QP_STATE_IDLE:
+		switch (new_state) {
+		case QED_IWARP_QP_STATE_RTS:
+			qp->iwarp_state = QED_IWARP_QP_STATE_RTS;
+			break;
+		case QED_IWARP_QP_STATE_ERROR:
+			qp->iwarp_state = QED_IWARP_QP_STATE_ERROR;
+			if (!internal)
+				modify_fw = true;
+			break;
+		default:
+			break;
+		}
+		break;
+	case QED_IWARP_QP_STATE_RTS:
+		switch (new_state) {
+		case QED_IWARP_QP_STATE_CLOSING:
+			if (!internal)
+				modify_fw = true;
+
+			qp->iwarp_state = QED_IWARP_QP_STATE_CLOSING;
+			break;
+		case QED_IWARP_QP_STATE_ERROR:
+			if (!internal)
+				modify_fw = true;
+			qp->iwarp_state = QED_IWARP_QP_STATE_ERROR;
+			break;
+		default:
+			break;
+		}
+		break;
+	case QED_IWARP_QP_STATE_ERROR:
+		switch (new_state) {
+		case QED_IWARP_QP_STATE_IDLE:
+
+			qp->iwarp_state = new_state;
+			break;
+		case QED_IWARP_QP_STATE_CLOSING:
+			/* could happen due to race... do nothing.... */
+			break;
+		default:
+			rc = -EINVAL;
+		}
+		break;
+	case QED_IWARP_QP_STATE_TERMINATE:
+	case QED_IWARP_QP_STATE_CLOSING:
+		qp->iwarp_state = new_state;
+		break;
+	default:
+		break;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) %s --> %s%s\n",
+		   qp->icid,
+		   iwarp_state_names[prev_iw_state],
+		   iwarp_state_names[qp->iwarp_state],
+		   internal ? "internal" : "");
+
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+
+	if (modify_fw)
+		rc = qed_iwarp_modify_fw(p_hwfn, qp);
+
+	return rc;
+}
+
+int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	int rc;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_DESTROY_QP,
+				 p_hwfn->p_rdma_info->proto, &init_data);
+	if (rc)
+		return rc;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) rc = %d\n", qp->icid, rc);
+
+	return rc;
+}
+
+static void qed_iwarp_destroy_ep(struct qed_hwfn *p_hwfn,
+				 struct qed_iwarp_ep *ep,
+				 bool remove_from_active_list)
+{
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(*ep->ep_buffer_virt),
+			  ep->ep_buffer_virt, ep->ep_buffer_phys);
+
+	if (remove_from_active_list) {
+		spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+		list_del(&ep->list_entry);
+		spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	}
+
+	if (ep->qp)
+		ep->qp->ep = NULL;
+
+	kfree(ep);
+}
+
+int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+	struct qed_iwarp_ep *ep = qp->ep;
+	int wait_count = 0;
+	int rc = 0;
+
+	if (qp->iwarp_state != QED_IWARP_QP_STATE_ERROR) {
+		rc = qed_iwarp_modify_qp(p_hwfn, qp,
+					 QED_IWARP_QP_STATE_ERROR, false);
+		if (rc)
+			return rc;
+	}
+
+	/* Make sure ep is closed before returning and freeing memory. */
+	if (ep) {
+		while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200)
+			msleep(100);
+
+		if (ep->state != QED_IWARP_EP_CLOSED)
+			DP_NOTICE(p_hwfn, "ep state close timeout state=%x\n",
+				  ep->state);
+
+		qed_iwarp_destroy_ep(p_hwfn, ep, false);
+	}
+
+	rc = qed_iwarp_fw_destroy(p_hwfn, qp);
+
+	if (qp->shared_queue)
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  IWARP_SHARED_QUEUE_PAGE_SIZE,
+				  qp->shared_queue, qp->shared_queue_phys_addr);
+
+	return rc;
+}
+
+static int
+qed_iwarp_create_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep **ep_out)
+{
+	struct qed_iwarp_ep *ep;
+	int rc;
+
+	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+	if (!ep)
+		return -ENOMEM;
+
+	ep->state = QED_IWARP_EP_INIT;
+
+	ep->ep_buffer_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+						sizeof(*ep->ep_buffer_virt),
+						&ep->ep_buffer_phys,
+						GFP_KERNEL);
+	if (!ep->ep_buffer_virt) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	ep->sig = QED_EP_SIG;
+
+	*ep_out = ep;
+
+	return 0;
+
+err:
+	kfree(ep);
+	return rc;
+}
+
+static void
+qed_iwarp_print_tcp_ramrod(struct qed_hwfn *p_hwfn,
+			   struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod)
+{
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "local_mac=%x %x %x, remote_mac=%x %x %x\n",
+		   p_tcp_ramrod->tcp.local_mac_addr_lo,
+		   p_tcp_ramrod->tcp.local_mac_addr_mid,
+		   p_tcp_ramrod->tcp.local_mac_addr_hi,
+		   p_tcp_ramrod->tcp.remote_mac_addr_lo,
+		   p_tcp_ramrod->tcp.remote_mac_addr_mid,
+		   p_tcp_ramrod->tcp.remote_mac_addr_hi);
+
+	if (p_tcp_ramrod->tcp.ip_version == TCP_IPV4) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "local_ip=%pI4h:%x, remote_ip=%pI4h%x, vlan=%x\n",
+			   p_tcp_ramrod->tcp.local_ip,
+			   p_tcp_ramrod->tcp.local_port,
+			   p_tcp_ramrod->tcp.remote_ip,
+			   p_tcp_ramrod->tcp.remote_port,
+			   p_tcp_ramrod->tcp.vlan_id);
+	} else {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "local_ip=%pI6h:%x, remote_ip=%pI6h:%x, vlan=%x\n",
+			   p_tcp_ramrod->tcp.local_ip,
+			   p_tcp_ramrod->tcp.local_port,
+			   p_tcp_ramrod->tcp.remote_ip,
+			   p_tcp_ramrod->tcp.remote_port,
+			   p_tcp_ramrod->tcp.vlan_id);
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "flow_label=%x, ttl=%x, tos_or_tc=%x, mss=%x, rcv_wnd_scale=%x, connect_mode=%x, flags=%x\n",
+		   p_tcp_ramrod->tcp.flow_label,
+		   p_tcp_ramrod->tcp.ttl,
+		   p_tcp_ramrod->tcp.tos_or_tc,
+		   p_tcp_ramrod->tcp.mss,
+		   p_tcp_ramrod->tcp.rcv_wnd_scale,
+		   p_tcp_ramrod->tcp.connect_mode,
+		   p_tcp_ramrod->tcp.flags);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "syn_ip_payload_length=%x, lo=%x, hi=%x\n",
+		   p_tcp_ramrod->tcp.syn_ip_payload_length,
+		   p_tcp_ramrod->tcp.syn_phy_addr_lo,
+		   p_tcp_ramrod->tcp.syn_phy_addr_hi);
+}
+
+static int
+qed_iwarp_tcp_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+	struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod;
+	struct tcp_offload_params_opt2 *tcp;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	dma_addr_t async_output_phys;
+	dma_addr_t in_pdata_phys;
+	u16 physical_q;
+	u8 tcp_flags;
+	int rc;
+	int i;
+
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = ep->tcp_cid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	if (ep->connect_mode == TCP_CONNECT_PASSIVE)
+		init_data.comp_mode = QED_SPQ_MODE_CB;
+	else
+		init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_TCP_OFFLOAD,
+				 PROTOCOLID_IWARP, &init_data);
+	if (rc)
+		return rc;
+
+	p_tcp_ramrod = &p_ent->ramrod.iwarp_tcp_offload;
+
+	in_pdata_phys = ep->ep_buffer_phys +
+			offsetof(struct qed_iwarp_ep_memory, in_pdata);
+	DMA_REGPAIR_LE(p_tcp_ramrod->iwarp.incoming_ulp_buffer.addr,
+		       in_pdata_phys);
+
+	p_tcp_ramrod->iwarp.incoming_ulp_buffer.len =
+	    cpu_to_le16(sizeof(ep->ep_buffer_virt->in_pdata));
+
+	async_output_phys = ep->ep_buffer_phys +
+			    offsetof(struct qed_iwarp_ep_memory, async_output);
+	DMA_REGPAIR_LE(p_tcp_ramrod->iwarp.async_eqe_output_buf,
+		       async_output_phys);
+
+	p_tcp_ramrod->iwarp.handle_for_async.hi = cpu_to_le32(PTR_HI(ep));
+	p_tcp_ramrod->iwarp.handle_for_async.lo = cpu_to_le32(PTR_LO(ep));
+
+	physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+	p_tcp_ramrod->iwarp.physical_q0 = cpu_to_le16(physical_q);
+	physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK);
+	p_tcp_ramrod->iwarp.physical_q1 = cpu_to_le16(physical_q);
+	p_tcp_ramrod->iwarp.mpa_mode = iwarp_info->mpa_rev;
+
+	tcp = &p_tcp_ramrod->tcp;
+	qed_set_fw_mac_addr(&tcp->remote_mac_addr_hi,
+			    &tcp->remote_mac_addr_mid,
+			    &tcp->remote_mac_addr_lo, ep->remote_mac_addr);
+	qed_set_fw_mac_addr(&tcp->local_mac_addr_hi, &tcp->local_mac_addr_mid,
+			    &tcp->local_mac_addr_lo, ep->local_mac_addr);
+
+	tcp->vlan_id = cpu_to_le16(ep->cm_info.vlan);
+
+	tcp_flags = p_hwfn->p_rdma_info->iwarp.tcp_flags;
+	tcp->flags = 0;
+	SET_FIELD(tcp->flags, TCP_OFFLOAD_PARAMS_OPT2_TS_EN,
+		  !!(tcp_flags & QED_IWARP_TS_EN));
+
+	SET_FIELD(tcp->flags, TCP_OFFLOAD_PARAMS_OPT2_DA_EN,
+		  !!(tcp_flags & QED_IWARP_DA_EN));
+
+	tcp->ip_version = ep->cm_info.ip_version;
+
+	for (i = 0; i < 4; i++) {
+		tcp->remote_ip[i] = cpu_to_le32(ep->cm_info.remote_ip[i]);
+		tcp->local_ip[i] = cpu_to_le32(ep->cm_info.local_ip[i]);
+	}
+
+	tcp->remote_port = cpu_to_le16(ep->cm_info.remote_port);
+	tcp->local_port = cpu_to_le16(ep->cm_info.local_port);
+	tcp->mss = cpu_to_le16(ep->mss);
+	tcp->flow_label = 0;
+	tcp->ttl = 0x40;
+	tcp->tos_or_tc = 0;
+
+	tcp->rcv_wnd_scale = (u8)p_hwfn->p_rdma_info->iwarp.rcv_wnd_scale;
+	tcp->connect_mode = ep->connect_mode;
+
+	if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+		tcp->syn_ip_payload_length =
+			cpu_to_le16(ep->syn_ip_payload_length);
+		tcp->syn_phy_addr_hi = DMA_HI_LE(ep->syn_phy_addr);
+		tcp->syn_phy_addr_lo = DMA_LO_LE(ep->syn_phy_addr);
+	}
+
+	qed_iwarp_print_tcp_ramrod(p_hwfn, p_tcp_ramrod);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "EP(0x%x) Offload completed rc=%d\n", ep->tcp_cid, rc);
+
+	return rc;
+}
+
+static void
+qed_iwarp_mpa_received(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+	struct qed_iwarp_cm_event_params params;
+	struct mpa_v2_hdr *mpa_v2;
+	union async_output *async_data;
+	u16 mpa_ord, mpa_ird;
+	u8 mpa_hdr_size = 0;
+	u8 mpa_rev;
+
+	async_data = &ep->ep_buffer_virt->async_output;
+
+	mpa_rev = async_data->mpa_request.mpa_handshake_mode;
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "private_data_len=%x handshake_mode=%x private_data=(%x)\n",
+		   async_data->mpa_request.ulp_data_len,
+		   mpa_rev, *((u32 *)(ep->ep_buffer_virt->in_pdata)));
+
+	if (mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) {
+		/* Read ord/ird values from private data buffer */
+		mpa_v2 = (struct mpa_v2_hdr *)ep->ep_buffer_virt->in_pdata;
+		mpa_hdr_size = sizeof(*mpa_v2);
+
+		mpa_ord = ntohs(mpa_v2->ord);
+		mpa_ird = ntohs(mpa_v2->ird);
+
+		/* Temprary store in cm_info incoming ord/ird requested, later
+		 * replace with negotiated value during accept
+		 */
+		ep->cm_info.ord = (u8)min_t(u16,
+					    (mpa_ord & MPA_V2_IRD_ORD_MASK),
+					    QED_IWARP_ORD_DEFAULT);
+
+		ep->cm_info.ird = (u8)min_t(u16,
+					    (mpa_ird & MPA_V2_IRD_ORD_MASK),
+					    QED_IWARP_IRD_DEFAULT);
+
+		/* Peer2Peer negotiation */
+		ep->rtr_type = MPA_RTR_TYPE_NONE;
+		if (mpa_ird & MPA_V2_PEER2PEER_MODEL) {
+			if (mpa_ord & MPA_V2_WRITE_RTR)
+				ep->rtr_type |= MPA_RTR_TYPE_ZERO_WRITE;
+
+			if (mpa_ord & MPA_V2_READ_RTR)
+				ep->rtr_type |= MPA_RTR_TYPE_ZERO_READ;
+
+			if (mpa_ird & MPA_V2_SEND_RTR)
+				ep->rtr_type |= MPA_RTR_TYPE_ZERO_SEND;
+
+			ep->rtr_type &= iwarp_info->rtr_type;
+
+			/* if we're left with no match send our capabilities */
+			if (ep->rtr_type == MPA_RTR_TYPE_NONE)
+				ep->rtr_type = iwarp_info->rtr_type;
+		}
+
+		ep->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED;
+	} else {
+		ep->cm_info.ord = QED_IWARP_ORD_DEFAULT;
+		ep->cm_info.ird = QED_IWARP_IRD_DEFAULT;
+		ep->mpa_rev = MPA_NEGOTIATION_TYPE_BASIC;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x rtr:0x%x ulp_data_len = %x mpa_hdr_size = %x\n",
+		   mpa_rev, ep->cm_info.ord, ep->cm_info.ird, ep->rtr_type,
+		   async_data->mpa_request.ulp_data_len, mpa_hdr_size);
+
+	/* Strip mpa v2 hdr from private data before sending to upper layer */
+	ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_hdr_size;
+
+	ep->cm_info.private_data_len = async_data->mpa_request.ulp_data_len -
+				       mpa_hdr_size;
+
+	params.event = QED_IWARP_EVENT_MPA_REQUEST;
+	params.cm_info = &ep->cm_info;
+	params.ep_context = ep;
+	params.status = 0;
+
+	ep->state = QED_IWARP_EP_MPA_REQ_RCVD;
+	ep->event_cb(ep->cb_context, &params);
+}
+
+static int
+qed_iwarp_mpa_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	struct iwarp_mpa_offload_ramrod_data *p_mpa_ramrod;
+	struct qed_sp_init_data init_data;
+	dma_addr_t async_output_phys;
+	struct qed_spq_entry *p_ent;
+	dma_addr_t out_pdata_phys;
+	dma_addr_t in_pdata_phys;
+	struct qed_rdma_qp *qp;
+	bool reject;
+	int rc;
+
+	if (!ep)
+		return -EINVAL;
+
+	qp = ep->qp;
+	reject = !qp;
+
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = reject ? ep->tcp_cid : qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+
+	if (ep->connect_mode == TCP_CONNECT_ACTIVE)
+		init_data.comp_mode = QED_SPQ_MODE_CB;
+	else
+		init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_MPA_OFFLOAD,
+				 PROTOCOLID_IWARP, &init_data);
+	if (rc)
+		return rc;
+
+	p_mpa_ramrod = &p_ent->ramrod.iwarp_mpa_offload;
+	out_pdata_phys = ep->ep_buffer_phys +
+			 offsetof(struct qed_iwarp_ep_memory, out_pdata);
+	DMA_REGPAIR_LE(p_mpa_ramrod->common.outgoing_ulp_buffer.addr,
+		       out_pdata_phys);
+	p_mpa_ramrod->common.outgoing_ulp_buffer.len =
+	    ep->cm_info.private_data_len;
+	p_mpa_ramrod->common.crc_needed = p_hwfn->p_rdma_info->iwarp.crc_needed;
+
+	p_mpa_ramrod->common.out_rq.ord = ep->cm_info.ord;
+	p_mpa_ramrod->common.out_rq.ird = ep->cm_info.ird;
+
+	p_mpa_ramrod->tcp_cid = p_hwfn->hw_info.opaque_fid << 16 | ep->tcp_cid;
+
+	in_pdata_phys = ep->ep_buffer_phys +
+			offsetof(struct qed_iwarp_ep_memory, in_pdata);
+	p_mpa_ramrod->tcp_connect_side = ep->connect_mode;
+	DMA_REGPAIR_LE(p_mpa_ramrod->incoming_ulp_buffer.addr,
+		       in_pdata_phys);
+	p_mpa_ramrod->incoming_ulp_buffer.len =
+	    cpu_to_le16(sizeof(ep->ep_buffer_virt->in_pdata));
+	async_output_phys = ep->ep_buffer_phys +
+			    offsetof(struct qed_iwarp_ep_memory, async_output);
+	DMA_REGPAIR_LE(p_mpa_ramrod->async_eqe_output_buf,
+		       async_output_phys);
+	p_mpa_ramrod->handle_for_async.hi = cpu_to_le32(PTR_HI(ep));
+	p_mpa_ramrod->handle_for_async.lo = cpu_to_le32(PTR_LO(ep));
+
+	if (!reject) {
+		DMA_REGPAIR_LE(p_mpa_ramrod->shared_queue_addr,
+			       qp->shared_queue_phys_addr);
+		p_mpa_ramrod->stats_counter_id =
+		    RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + qp->stats_queue;
+	} else {
+		p_mpa_ramrod->common.reject = 1;
+	}
+
+	p_mpa_ramrod->mode = ep->mpa_rev;
+	SET_FIELD(p_mpa_ramrod->rtr_pref,
+		  IWARP_MPA_OFFLOAD_RAMROD_DATA_RTR_SUPPORTED, ep->rtr_type);
+
+	ep->state = QED_IWARP_EP_MPA_OFFLOADED;
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (!reject)
+		ep->cid = qp->icid;	/* Now they're migrated. */
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_RDMA,
+		   "QP(0x%x) EP(0x%x) MPA Offload rc = %d IRD=0x%x ORD=0x%x rtr_type=%d mpa_rev=%d reject=%d\n",
+		   reject ? 0xffff : qp->icid,
+		   ep->tcp_cid,
+		   rc,
+		   ep->cm_info.ird,
+		   ep->cm_info.ord, ep->rtr_type, ep->mpa_rev, reject);
+	return rc;
+}
+
+static void
+qed_iwarp_return_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	ep->state = QED_IWARP_EP_INIT;
+	if (ep->qp)
+		ep->qp->ep = NULL;
+	ep->qp = NULL;
+	memset(&ep->cm_info, 0, sizeof(ep->cm_info));
+
+	if (ep->tcp_cid == QED_IWARP_INVALID_TCP_CID) {
+		/* We don't care about the return code, it's ok if tcp_cid
+		 * remains invalid...in this case we'll defer allocation
+		 */
+		qed_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid);
+	}
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	list_del(&ep->list_entry);
+	list_add_tail(&ep->list_entry,
+		      &p_hwfn->p_rdma_info->iwarp.ep_free_list);
+
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+}
+
+void
+qed_iwarp_parse_private_data(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	struct mpa_v2_hdr *mpa_v2_params;
+	union async_output *async_data;
+	u16 mpa_ird, mpa_ord;
+	u8 mpa_data_size = 0;
+
+	if (MPA_REV2(p_hwfn->p_rdma_info->iwarp.mpa_rev)) {
+		mpa_v2_params =
+			(struct mpa_v2_hdr *)(ep->ep_buffer_virt->in_pdata);
+		mpa_data_size = sizeof(*mpa_v2_params);
+		mpa_ird = ntohs(mpa_v2_params->ird);
+		mpa_ord = ntohs(mpa_v2_params->ord);
+
+		ep->cm_info.ird = (u8)(mpa_ord & MPA_V2_IRD_ORD_MASK);
+		ep->cm_info.ord = (u8)(mpa_ird & MPA_V2_IRD_ORD_MASK);
+	}
+	async_data = &ep->ep_buffer_virt->async_output;
+
+	ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_data_size;
+	ep->cm_info.private_data_len = async_data->mpa_response.ulp_data_len -
+				       mpa_data_size;
+}
+
+void
+qed_iwarp_mpa_reply_arrived(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	struct qed_iwarp_cm_event_params params;
+
+	if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+		DP_NOTICE(p_hwfn,
+			  "MPA reply event not expected on passive side!\n");
+		return;
+	}
+
+	params.event = QED_IWARP_EVENT_ACTIVE_MPA_REPLY;
+
+	qed_iwarp_parse_private_data(p_hwfn, ep);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n",
+		   ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird);
+
+	params.cm_info = &ep->cm_info;
+	params.ep_context = ep;
+	params.status = 0;
+
+	ep->mpa_reply_processed = true;
+
+	ep->event_cb(ep->cb_context, &params);
+}
+
+#define QED_IWARP_CONNECT_MODE_STRING(ep) \
+	((ep)->connect_mode == TCP_CONNECT_PASSIVE) ? "Passive" : "Active"
+
+/* Called as a result of the event:
+ * IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE
+ */
+static void
+qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn,
+		       struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+	struct qed_iwarp_cm_event_params params;
+
+	if (ep->connect_mode == TCP_CONNECT_ACTIVE)
+		params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
+	else
+		params.event = QED_IWARP_EVENT_PASSIVE_COMPLETE;
+
+	if (ep->connect_mode == TCP_CONNECT_ACTIVE && !ep->mpa_reply_processed)
+		qed_iwarp_parse_private_data(p_hwfn, ep);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n",
+		   ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird);
+
+	params.cm_info = &ep->cm_info;
+
+	params.ep_context = ep;
+
+	ep->state = QED_IWARP_EP_CLOSED;
+
+	switch (fw_return_code) {
+	case RDMA_RETURN_OK:
+		ep->qp->max_rd_atomic_req = ep->cm_info.ord;
+		ep->qp->max_rd_atomic_resp = ep->cm_info.ird;
+		qed_iwarp_modify_qp(p_hwfn, ep->qp, QED_IWARP_QP_STATE_RTS, 1);
+		ep->state = QED_IWARP_EP_ESTABLISHED;
+		params.status = 0;
+		break;
+	case IWARP_CONN_ERROR_MPA_TIMEOUT:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA timeout\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -EBUSY;
+		break;
+	case IWARP_CONN_ERROR_MPA_ERROR_REJECT:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA Reject\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_RST:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA reset(tcp cid: 0x%x)\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid,
+			  ep->tcp_cid);
+		params.status = -ECONNRESET;
+		break;
+	case IWARP_CONN_ERROR_MPA_FIN:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA received FIN\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_INSUF_IRD:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA insufficient ird\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_RTR_MISMATCH:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA RTR MISMATCH\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_INVALID_PACKET:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA Invalid Packet\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_LOCAL_ERROR:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA Local Error\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_TERMINATE:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA TERMINATE\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+		params.status = -ECONNREFUSED;
+		break;
+	default:
+		params.status = -ECONNRESET;
+		break;
+	}
+
+	ep->event_cb(ep->cb_context, &params);
+
+	/* on passive side, if there is no associated QP (REJECT) we need to
+	 * return the ep to the pool, (in the regular case we add an element
+	 * in accept instead of this one.
+	 * In both cases we need to remove it from the ep_list.
+	 */
+	if (fw_return_code != RDMA_RETURN_OK) {
+		ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+		if ((ep->connect_mode == TCP_CONNECT_PASSIVE) &&
+		    (!ep->qp)) {	/* Rejected */
+			qed_iwarp_return_ep(p_hwfn, ep);
+		} else {
+			spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+			list_del(&ep->list_entry);
+			spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+		}
+	}
+}
+
+static void
+qed_iwarp_mpa_v2_set_private(struct qed_hwfn *p_hwfn,
+			     struct qed_iwarp_ep *ep, u8 *mpa_data_size)
+{
+	struct mpa_v2_hdr *mpa_v2_params;
+	u16 mpa_ird, mpa_ord;
+
+	*mpa_data_size = 0;
+	if (MPA_REV2(ep->mpa_rev)) {
+		mpa_v2_params =
+		    (struct mpa_v2_hdr *)ep->ep_buffer_virt->out_pdata;
+		*mpa_data_size = sizeof(*mpa_v2_params);
+
+		mpa_ird = (u16)ep->cm_info.ird;
+		mpa_ord = (u16)ep->cm_info.ord;
+
+		if (ep->rtr_type != MPA_RTR_TYPE_NONE) {
+			mpa_ird |= MPA_V2_PEER2PEER_MODEL;
+
+			if (ep->rtr_type & MPA_RTR_TYPE_ZERO_SEND)
+				mpa_ird |= MPA_V2_SEND_RTR;
+
+			if (ep->rtr_type & MPA_RTR_TYPE_ZERO_WRITE)
+				mpa_ord |= MPA_V2_WRITE_RTR;
+
+			if (ep->rtr_type & MPA_RTR_TYPE_ZERO_READ)
+				mpa_ord |= MPA_V2_READ_RTR;
+		}
+
+		mpa_v2_params->ird = htons(mpa_ird);
+		mpa_v2_params->ord = htons(mpa_ord);
+
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "MPA_NEGOTIATE Header: [%x ord:%x ird] %x ord:%x ird:%x peer2peer:%x rtr_send:%x rtr_write:%x rtr_read:%x\n",
+			   mpa_v2_params->ird,
+			   mpa_v2_params->ord,
+			   *((u32 *)mpa_v2_params),
+			   mpa_ord & MPA_V2_IRD_ORD_MASK,
+			   mpa_ird & MPA_V2_IRD_ORD_MASK,
+			   !!(mpa_ird & MPA_V2_PEER2PEER_MODEL),
+			   !!(mpa_ird & MPA_V2_SEND_RTR),
+			   !!(mpa_ord & MPA_V2_WRITE_RTR),
+			   !!(mpa_ord & MPA_V2_READ_RTR));
+	}
+}
+
+int qed_iwarp_connect(void *rdma_cxt,
+		      struct qed_iwarp_connect_in *iparams,
+		      struct qed_iwarp_connect_out *oparams)
+{
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+	struct qed_iwarp_info *iwarp_info;
+	struct qed_iwarp_ep *ep;
+	u8 mpa_data_size = 0;
+	u8 ts_hdr_size = 0;
+	u32 cid;
+	int rc;
+
+	if ((iparams->cm_info.ord > QED_IWARP_ORD_DEFAULT) ||
+	    (iparams->cm_info.ird > QED_IWARP_IRD_DEFAULT)) {
+		DP_NOTICE(p_hwfn,
+			  "QP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n",
+			  iparams->qp->icid, iparams->cm_info.ord,
+			  iparams->cm_info.ird);
+
+		return -EINVAL;
+	}
+
+	iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+
+	/* Allocate ep object */
+	rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+	if (rc)
+		return rc;
+
+	rc = qed_iwarp_create_ep(p_hwfn, &ep);
+	if (rc)
+		goto err;
+
+	ep->tcp_cid = cid;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	list_add_tail(&ep->list_entry, &p_hwfn->p_rdma_info->iwarp.ep_list);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	ep->qp = iparams->qp;
+	ep->qp->ep = ep;
+	ether_addr_copy(ep->remote_mac_addr, iparams->remote_mac_addr);
+	ether_addr_copy(ep->local_mac_addr, iparams->local_mac_addr);
+	memcpy(&ep->cm_info, &iparams->cm_info, sizeof(ep->cm_info));
+
+	ep->cm_info.ord = iparams->cm_info.ord;
+	ep->cm_info.ird = iparams->cm_info.ird;
+
+	ep->rtr_type = iwarp_info->rtr_type;
+	if (!iwarp_info->peer2peer)
+		ep->rtr_type = MPA_RTR_TYPE_NONE;
+
+	if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) && (ep->cm_info.ord == 0))
+		ep->cm_info.ord = 1;
+
+	ep->mpa_rev = iwarp_info->mpa_rev;
+
+	qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+	ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+	ep->cm_info.private_data_len = iparams->cm_info.private_data_len +
+				       mpa_data_size;
+
+	memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+	       iparams->cm_info.private_data,
+	       iparams->cm_info.private_data_len);
+
+	if (p_hwfn->p_rdma_info->iwarp.tcp_flags & QED_IWARP_TS_EN)
+		ts_hdr_size = TIMESTAMP_HEADER_SIZE;
+
+	ep->mss = iparams->mss - ts_hdr_size;
+	ep->mss = min_t(u16, QED_IWARP_MAX_FW_MSS, ep->mss);
+
+	ep->event_cb = iparams->event_cb;
+	ep->cb_context = iparams->cb_context;
+	ep->connect_mode = TCP_CONNECT_ACTIVE;
+
+	oparams->ep_context = ep;
+
+	rc = qed_iwarp_tcp_offload(p_hwfn, ep);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x) rc = %d\n",
+		   iparams->qp->icid, ep->tcp_cid, rc);
+
+	if (rc) {
+		qed_iwarp_destroy_ep(p_hwfn, ep, true);
+		goto err;
+	}
+
+	return rc;
+err:
+	qed_iwarp_cid_cleaned(p_hwfn, cid);
+
+	return rc;
+}
+
+static struct qed_iwarp_ep *qed_iwarp_get_free_ep(struct qed_hwfn *p_hwfn)
+{
+	struct qed_iwarp_ep *ep = NULL;
+	int rc;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	if (list_empty(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) {
+		DP_ERR(p_hwfn, "Ep list is empty\n");
+		goto out;
+	}
+
+	ep = list_first_entry(&p_hwfn->p_rdma_info->iwarp.ep_free_list,
+			      struct qed_iwarp_ep, list_entry);
+
+	/* in some cases we could have failed allocating a tcp cid when added
+	 * from accept / failure... retry now..this is not the common case.
+	 */
+	if (ep->tcp_cid == QED_IWARP_INVALID_TCP_CID) {
+		rc = qed_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid);
+
+		/* if we fail we could look for another entry with a valid
+		 * tcp_cid, but since we don't expect to reach this anyway
+		 * it's not worth the handling
+		 */
+		if (rc) {
+			ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+			ep = NULL;
+			goto out;
+		}
+	}
+
+	list_del(&ep->list_entry);
+
+out:
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	return ep;
+}
+
+#define QED_IWARP_MAX_CID_CLEAN_TIME  100
+#define QED_IWARP_MAX_NO_PROGRESS_CNT 5
+
+/* This function waits for all the bits of a bmap to be cleared, as long as
+ * there is progress ( i.e. the number of bits left to be cleared decreases )
+ * the function continues.
+ */
+static int
+qed_iwarp_wait_cid_map_cleared(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap)
+{
+	int prev_weight = 0;
+	int wait_count = 0;
+	int weight = 0;
+
+	weight = bitmap_weight(bmap->bitmap, bmap->max_count);
+	prev_weight = weight;
+
+	while (weight) {
+		msleep(QED_IWARP_MAX_CID_CLEAN_TIME);
+
+		weight = bitmap_weight(bmap->bitmap, bmap->max_count);
+
+		if (prev_weight == weight) {
+			wait_count++;
+		} else {
+			prev_weight = weight;
+			wait_count = 0;
+		}
+
+		if (wait_count > QED_IWARP_MAX_NO_PROGRESS_CNT) {
+			DP_NOTICE(p_hwfn,
+				  "%s bitmap wait timed out (%d cids pending)\n",
+				  bmap->name, weight);
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+static int qed_iwarp_wait_for_all_cids(struct qed_hwfn *p_hwfn)
+{
+	int rc;
+	int i;
+
+	rc = qed_iwarp_wait_cid_map_cleared(p_hwfn,
+					    &p_hwfn->p_rdma_info->tcp_cid_map);
+	if (rc)
+		return rc;
+
+	/* Now free the tcp cids from the main cid map */
+	for (i = 0; i < QED_IWARP_PREALLOC_CNT; i++)
+		qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, i);
+
+	/* Now wait for all cids to be completed */
+	return qed_iwarp_wait_cid_map_cleared(p_hwfn,
+					      &p_hwfn->p_rdma_info->cid_map);
+}
+
+static void qed_iwarp_free_prealloc_ep(struct qed_hwfn *p_hwfn)
+{
+	struct qed_iwarp_ep *ep;
+
+	while (!list_empty(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) {
+		spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+		ep = list_first_entry(&p_hwfn->p_rdma_info->iwarp.ep_free_list,
+				      struct qed_iwarp_ep, list_entry);
+
+		if (!ep) {
+			spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+			break;
+		}
+		list_del(&ep->list_entry);
+
+		spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+		if (ep->tcp_cid != QED_IWARP_INVALID_TCP_CID)
+			qed_iwarp_cid_cleaned(p_hwfn, ep->tcp_cid);
+
+		qed_iwarp_destroy_ep(p_hwfn, ep, false);
+	}
+}
+
+static int qed_iwarp_prealloc_ep(struct qed_hwfn *p_hwfn, bool init)
+{
+	struct qed_iwarp_ep *ep;
+	int rc = 0;
+	int count;
+	u32 cid;
+	int i;
+
+	count = init ? QED_IWARP_PREALLOC_CNT : 1;
+	for (i = 0; i < count; i++) {
+		rc = qed_iwarp_create_ep(p_hwfn, &ep);
+		if (rc)
+			return rc;
+
+		/* During initialization we allocate from the main pool,
+		 * afterwards we allocate only from the tcp_cid.
+		 */
+		if (init) {
+			rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+			if (rc)
+				goto err;
+			qed_iwarp_set_tcp_cid(p_hwfn, cid);
+		} else {
+			/* We don't care about the return code, it's ok if
+			 * tcp_cid remains invalid...in this case we'll
+			 * defer allocation
+			 */
+			qed_iwarp_alloc_tcp_cid(p_hwfn, &cid);
+		}
+
+		ep->tcp_cid = cid;
+
+		spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+		list_add_tail(&ep->list_entry,
+			      &p_hwfn->p_rdma_info->iwarp.ep_free_list);
+		spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	}
+
+	return rc;
+
+err:
+	qed_iwarp_destroy_ep(p_hwfn, ep, false);
+
+	return rc;
+}
+
+int qed_iwarp_alloc(struct qed_hwfn *p_hwfn)
+{
+	int rc;
+
+	/* Allocate bitmap for tcp cid. These are used by passive side
+	 * to ensure it can allocate a tcp cid during dpc that was
+	 * pre-acquired and doesn't require dynamic allocation of ilt
+	 */
+	rc = qed_rdma_bmap_alloc(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map,
+				 QED_IWARP_PREALLOC_CNT, "TCP_CID");
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Failed to allocate tcp cid, rc = %d\n", rc);
+		return rc;
+	}
+
+	INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_free_list);
+	spin_lock_init(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	return qed_iwarp_prealloc_ep(p_hwfn, true);
+}
+
+void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn)
+{
+	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1);
+}
+
+int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams)
+{
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+	struct qed_iwarp_ep *ep;
+	u8 mpa_data_size = 0;
+	int rc;
+
+	ep = iparams->ep_context;
+	if (!ep) {
+		DP_ERR(p_hwfn, "Ep Context receive in accept is NULL\n");
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x)\n",
+		   iparams->qp->icid, ep->tcp_cid);
+
+	if ((iparams->ord > QED_IWARP_ORD_DEFAULT) ||
+	    (iparams->ird > QED_IWARP_IRD_DEFAULT)) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "QP(0x%x) EP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n",
+			   iparams->qp->icid,
+			   ep->tcp_cid, iparams->ord, iparams->ord);
+		return -EINVAL;
+	}
+
+	qed_iwarp_prealloc_ep(p_hwfn, false);
+
+	ep->cb_context = iparams->cb_context;
+	ep->qp = iparams->qp;
+	ep->qp->ep = ep;
+
+	if (ep->mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) {
+		/* Negotiate ord/ird: if upperlayer requested ord larger than
+		 * ird advertised by remote, we need to decrease our ord
+		 */
+		if (iparams->ord > ep->cm_info.ird)
+			iparams->ord = ep->cm_info.ird;
+
+		if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) &&
+		    (iparams->ird == 0))
+			iparams->ird = 1;
+	}
+
+	/* Update cm_info ord/ird to be negotiated values */
+	ep->cm_info.ord = iparams->ord;
+	ep->cm_info.ird = iparams->ird;
+
+	qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+	ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+	ep->cm_info.private_data_len = iparams->private_data_len +
+				       mpa_data_size;
+
+	memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+	       iparams->private_data, iparams->private_data_len);
+
+	rc = qed_iwarp_mpa_offload(p_hwfn, ep);
+	if (rc)
+		qed_iwarp_modify_qp(p_hwfn,
+				    iparams->qp, QED_IWARP_QP_STATE_ERROR, 1);
+
+	return rc;
+}
+
+int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams)
+{
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+	struct qed_iwarp_ep *ep;
+	u8 mpa_data_size = 0;
+
+	ep = iparams->ep_context;
+	if (!ep) {
+		DP_ERR(p_hwfn, "Ep Context receive in reject is NULL\n");
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "EP(0x%x)\n", ep->tcp_cid);
+
+	ep->cb_context = iparams->cb_context;
+	ep->qp = NULL;
+
+	qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+	ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+	ep->cm_info.private_data_len = iparams->private_data_len +
+				       mpa_data_size;
+
+	memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+	       iparams->private_data, iparams->private_data_len);
+
+	return qed_iwarp_mpa_offload(p_hwfn, ep);
+}
+
+static void
+qed_iwarp_print_cm_info(struct qed_hwfn *p_hwfn,
+			struct qed_iwarp_cm_info *cm_info)
+{
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "ip_version = %d\n",
+		   cm_info->ip_version);
+
+	if (cm_info->ip_version == QED_TCP_IPV4)
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "remote_ip %pI4h:%x, local_ip %pI4h:%x vlan=%x\n",
+			   cm_info->remote_ip, cm_info->remote_port,
+			   cm_info->local_ip, cm_info->local_port,
+			   cm_info->vlan);
+	else
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "remote_ip %pI6h:%x, local_ip %pI6h:%x vlan=%x\n",
+			   cm_info->remote_ip, cm_info->remote_port,
+			   cm_info->local_ip, cm_info->local_port,
+			   cm_info->vlan);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "private_data_len = %x ord = %d, ird = %d\n",
+		   cm_info->private_data_len, cm_info->ord, cm_info->ird);
+}
+
+static int
+qed_iwarp_ll2_post_rx(struct qed_hwfn *p_hwfn,
+		      struct qed_iwarp_ll2_buff *buf, u8 handle)
+{
+	int rc;
+
+	rc = qed_ll2_post_rx_buffer(p_hwfn, handle, buf->data_phys_addr,
+				    (u16)buf->buff_size, buf, 1);
+	if (rc) {
+		DP_NOTICE(p_hwfn,
+			  "Failed to repost rx buffer to ll2 rc = %d, handle=%d\n",
+			  rc, handle);
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev, buf->buff_size,
+				  buf->data, buf->data_phys_addr);
+		kfree(buf);
+	}
+
+	return rc;
+}
+
+static bool
+qed_iwarp_ep_exists(struct qed_hwfn *p_hwfn, struct qed_iwarp_cm_info *cm_info)
+{
+	struct qed_iwarp_ep *ep = NULL;
+	bool found = false;
+
+	list_for_each_entry(ep,
+			    &p_hwfn->p_rdma_info->iwarp.ep_list,
+			    list_entry) {
+		if ((ep->cm_info.local_port == cm_info->local_port) &&
+		    (ep->cm_info.remote_port == cm_info->remote_port) &&
+		    (ep->cm_info.vlan == cm_info->vlan) &&
+		    !memcmp(&ep->cm_info.local_ip, cm_info->local_ip,
+			    sizeof(cm_info->local_ip)) &&
+		    !memcmp(&ep->cm_info.remote_ip, cm_info->remote_ip,
+			    sizeof(cm_info->remote_ip))) {
+			found = true;
+			break;
+		}
+	}
+
+	if (found) {
+		DP_NOTICE(p_hwfn,
+			  "SYN received on active connection - dropping\n");
+		qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+		return true;
+	}
+
+	return false;
+}
+
+static struct qed_iwarp_listener *
+qed_iwarp_get_listener(struct qed_hwfn *p_hwfn,
+		       struct qed_iwarp_cm_info *cm_info)
+{
+	struct qed_iwarp_listener *listener = NULL;
+	static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+	bool found = false;
+
+	qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+	list_for_each_entry(listener,
+			    &p_hwfn->p_rdma_info->iwarp.listen_list,
+			    list_entry) {
+		if (listener->port == cm_info->local_port) {
+			if (!memcmp(listener->ip_addr,
+				    ip_zero, sizeof(ip_zero))) {
+				found = true;
+				break;
+			}
+
+			if (!memcmp(listener->ip_addr,
+				    cm_info->local_ip,
+				    sizeof(cm_info->local_ip)) &&
+			    (listener->vlan == cm_info->vlan)) {
+				found = true;
+				break;
+			}
+		}
+	}
+
+	if (found) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "listener found = %p\n",
+			   listener);
+		return listener;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "listener not found\n");
+	return NULL;
+}
+
+static int
+qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
+		       struct qed_iwarp_cm_info *cm_info,
+		       void *buf,
+		       u8 *remote_mac_addr,
+		       u8 *local_mac_addr,
+		       int *payload_len, int *tcp_start_offset)
+{
+	struct vlan_ethhdr *vethh;
+	bool vlan_valid = false;
+	struct ipv6hdr *ip6h;
+	struct ethhdr *ethh;
+	struct tcphdr *tcph;
+	struct iphdr *iph;
+	int eth_hlen;
+	int ip_hlen;
+	int eth_type;
+	int i;
+
+	ethh = buf;
+	eth_type = ntohs(ethh->h_proto);
+	if (eth_type == ETH_P_8021Q) {
+		vlan_valid = true;
+		vethh = (struct vlan_ethhdr *)ethh;
+		cm_info->vlan = ntohs(vethh->h_vlan_TCI) & VLAN_VID_MASK;
+		eth_type = ntohs(vethh->h_vlan_encapsulated_proto);
+	}
+
+	eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0);
+
+	ether_addr_copy(remote_mac_addr, ethh->h_source);
+	ether_addr_copy(local_mac_addr, ethh->h_dest);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_type =%d source mac: %pM\n",
+		   eth_type, ethh->h_source);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_hlen=%d destination mac: %pM\n",
+		   eth_hlen, ethh->h_dest);
+
+	iph = (struct iphdr *)((u8 *)(ethh) + eth_hlen);
+
+	if (eth_type == ETH_P_IP) {
+		cm_info->local_ip[0] = ntohl(iph->daddr);
+		cm_info->remote_ip[0] = ntohl(iph->saddr);
+		cm_info->ip_version = TCP_IPV4;
+
+		ip_hlen = (iph->ihl) * sizeof(u32);
+		*payload_len = ntohs(iph->tot_len) - ip_hlen;
+	} else if (eth_type == ETH_P_IPV6) {
+		ip6h = (struct ipv6hdr *)iph;
+		for (i = 0; i < 4; i++) {
+			cm_info->local_ip[i] =
+			    ntohl(ip6h->daddr.in6_u.u6_addr32[i]);
+			cm_info->remote_ip[i] =
+			    ntohl(ip6h->saddr.in6_u.u6_addr32[i]);
+		}
+		cm_info->ip_version = TCP_IPV6;
+
+		ip_hlen = sizeof(*ip6h);
+		*payload_len = ntohs(ip6h->payload_len);
+	} else {
+		DP_NOTICE(p_hwfn, "Unexpected ethertype on ll2 %x\n", eth_type);
+		return -EINVAL;
+	}
+
+	tcph = (struct tcphdr *)((u8 *)iph + ip_hlen);
+
+	if (!tcph->syn) {
+		DP_NOTICE(p_hwfn,
+			  "Only SYN type packet expected on this ll2 conn, iph->ihl=%d source=%d dest=%d\n",
+			  iph->ihl, tcph->source, tcph->dest);
+		return -EINVAL;
+	}
+
+	cm_info->local_port = ntohs(tcph->dest);
+	cm_info->remote_port = ntohs(tcph->source);
+
+	qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+	*tcp_start_offset = eth_hlen + ip_hlen;
+
+	return 0;
+}
+
+static void
+qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
+{
+	struct qed_iwarp_ll2_buff *buf = data->cookie;
+	struct qed_iwarp_listener *listener;
+	struct qed_ll2_tx_pkt_info tx_pkt;
+	struct qed_iwarp_cm_info cm_info;
+	struct qed_hwfn *p_hwfn = cxt;
+	u8 remote_mac_addr[ETH_ALEN];
+	u8 local_mac_addr[ETH_ALEN];
+	struct qed_iwarp_ep *ep;
+	int tcp_start_offset;
+	u8 ts_hdr_size = 0;
+	u8 ll2_syn_handle;
+	int payload_len;
+	u32 hdr_size;
+	int rc;
+
+	memset(&cm_info, 0, sizeof(cm_info));
+
+	if (GET_FIELD(data->parse_flags,
+		      PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) &&
+	    GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) {
+		DP_NOTICE(p_hwfn, "Syn packet received with checksum error\n");
+		goto err;
+	}
+
+	rc = qed_iwarp_parse_rx_pkt(p_hwfn, &cm_info, (u8 *)(buf->data) +
+				    data->u.placement_offset, remote_mac_addr,
+				    local_mac_addr, &payload_len,
+				    &tcp_start_offset);
+	if (rc)
+		goto err;
+
+	/* Check if there is a listener for this 4-tuple+vlan */
+	ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle;
+	listener = qed_iwarp_get_listener(p_hwfn, &cm_info);
+	if (!listener) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "SYN received on tuple not listened on parse_flags=%d packet len=%d\n",
+			   data->parse_flags, data->length.packet_length);
+
+		memset(&tx_pkt, 0, sizeof(tx_pkt));
+		tx_pkt.num_of_bds = 1;
+		tx_pkt.vlan = data->vlan;
+
+		if (GET_FIELD(data->parse_flags,
+			      PARSING_AND_ERR_FLAGS_TAG8021QEXIST))
+			SET_FIELD(tx_pkt.bd_flags,
+				  CORE_TX_BD_DATA_VLAN_INSERTION, 1);
+
+		tx_pkt.l4_hdr_offset_w = (data->length.packet_length) >> 2;
+		tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
+		tx_pkt.first_frag = buf->data_phys_addr +
+				    data->u.placement_offset;
+		tx_pkt.first_frag_len = data->length.packet_length;
+		tx_pkt.cookie = buf;
+
+		rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_syn_handle,
+					       &tx_pkt, true);
+
+		if (rc) {
+			DP_NOTICE(p_hwfn,
+				  "Can't post SYN back to chip rc=%d\n", rc);
+			goto err;
+		}
+		return;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Received syn on listening port\n");
+	/* There may be an open ep on this connection if this is a syn
+	 * retrasnmit... need to make sure there isn't...
+	 */
+	if (qed_iwarp_ep_exists(p_hwfn, &cm_info))
+		goto err;
+
+	ep = qed_iwarp_get_free_ep(p_hwfn);
+	if (!ep)
+		goto err;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	list_add_tail(&ep->list_entry, &p_hwfn->p_rdma_info->iwarp.ep_list);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	ether_addr_copy(ep->remote_mac_addr, remote_mac_addr);
+	ether_addr_copy(ep->local_mac_addr, local_mac_addr);
+
+	memcpy(&ep->cm_info, &cm_info, sizeof(ep->cm_info));
+
+	if (p_hwfn->p_rdma_info->iwarp.tcp_flags & QED_IWARP_TS_EN)
+		ts_hdr_size = TIMESTAMP_HEADER_SIZE;
+
+	hdr_size = ((cm_info.ip_version == QED_TCP_IPV4) ? 40 : 60) +
+		   ts_hdr_size;
+	ep->mss = p_hwfn->p_rdma_info->iwarp.max_mtu - hdr_size;
+	ep->mss = min_t(u16, QED_IWARP_MAX_FW_MSS, ep->mss);
+
+	ep->event_cb = listener->event_cb;
+	ep->cb_context = listener->cb_context;
+	ep->connect_mode = TCP_CONNECT_PASSIVE;
+
+	ep->syn = buf;
+	ep->syn_ip_payload_length = (u16)payload_len;
+	ep->syn_phy_addr = buf->data_phys_addr + data->u.placement_offset +
+			   tcp_start_offset;
+
+	rc = qed_iwarp_tcp_offload(p_hwfn, ep);
+	if (rc) {
+		qed_iwarp_return_ep(p_hwfn, ep);
+		goto err;
+	}
+
+	return;
+err:
+	qed_iwarp_ll2_post_rx(p_hwfn, buf, ll2_syn_handle);
+}
+
+static void qed_iwarp_ll2_rel_rx_pkt(void *cxt, u8 connection_handle,
+				     void *cookie, dma_addr_t rx_buf_addr,
+				     bool b_last_packet)
+{
+	struct qed_iwarp_ll2_buff *buffer = cookie;
+	struct qed_hwfn *p_hwfn = cxt;
+
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size,
+			  buffer->data, buffer->data_phys_addr);
+	kfree(buffer);
+}
+
+static void qed_iwarp_ll2_comp_tx_pkt(void *cxt, u8 connection_handle,
+				      void *cookie, dma_addr_t first_frag_addr,
+				      bool b_last_fragment, bool b_last_packet)
+{
+	struct qed_iwarp_ll2_buff *buffer = cookie;
+	struct qed_hwfn *p_hwfn = cxt;
+
+	/* this was originally an rx packet, post it back */
+	qed_iwarp_ll2_post_rx(p_hwfn, buffer, connection_handle);
+}
+
+static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle,
+				     void *cookie, dma_addr_t first_frag_addr,
+				     bool b_last_fragment, bool b_last_packet)
+{
+	struct qed_iwarp_ll2_buff *buffer = cookie;
+	struct qed_hwfn *p_hwfn = cxt;
+
+	if (!buffer)
+		return;
+
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size,
+			  buffer->data, buffer->data_phys_addr);
+
+	kfree(buffer);
+}
+
+static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+	int rc = 0;
+
+	if (iwarp_info->ll2_syn_handle != QED_IWARP_HANDLE_INVAL) {
+		rc = qed_ll2_terminate_connection(p_hwfn,
+						  iwarp_info->ll2_syn_handle);
+		if (rc)
+			DP_INFO(p_hwfn, "Failed to terminate syn connection\n");
+
+		qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_syn_handle);
+		iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
+	}
+
+	qed_llh_remove_mac_filter(p_hwfn,
+				  p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr);
+	return rc;
+}
+
+static int
+qed_iwarp_ll2_alloc_buffers(struct qed_hwfn *p_hwfn,
+			    int num_rx_bufs, int buff_size, u8 ll2_handle)
+{
+	struct qed_iwarp_ll2_buff *buffer;
+	int rc = 0;
+	int i;
+
+	for (i = 0; i < num_rx_bufs; i++) {
+		buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+		if (!buffer) {
+			rc = -ENOMEM;
+			break;
+		}
+
+		buffer->data = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+						  buff_size,
+						  &buffer->data_phys_addr,
+						  GFP_KERNEL);
+		if (!buffer->data) {
+			kfree(buffer);
+			rc = -ENOMEM;
+			break;
+		}
+
+		buffer->buff_size = buff_size;
+		rc = qed_iwarp_ll2_post_rx(p_hwfn, buffer, ll2_handle);
+		if (rc)
+			/* buffers will be deallocated by qed_ll2 */
+			break;
+	}
+	return rc;
+}
+
+#define QED_IWARP_MAX_BUF_SIZE(mtu)				     \
+	ALIGN((mtu) + ETH_HLEN + 2 * VLAN_HLEN + 2 + ETH_CACHE_LINE_SIZE, \
+		ETH_CACHE_LINE_SIZE)
+
+static int
+qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
+		    struct qed_rdma_start_in_params *params,
+		    struct qed_ptt *p_ptt)
+{
+	struct qed_iwarp_info *iwarp_info;
+	struct qed_ll2_acquire_data data;
+	struct qed_ll2_cbs cbs;
+	int rc = 0;
+
+	iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+	iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
+
+	iwarp_info->max_mtu = params->max_mtu;
+
+	ether_addr_copy(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr);
+
+	rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+	if (rc)
+		return rc;
+
+	/* Start SYN connection */
+	cbs.rx_comp_cb = qed_iwarp_ll2_comp_syn_pkt;
+	cbs.rx_release_cb = qed_iwarp_ll2_rel_rx_pkt;
+	cbs.tx_comp_cb = qed_iwarp_ll2_comp_tx_pkt;
+	cbs.tx_release_cb = qed_iwarp_ll2_rel_tx_pkt;
+	cbs.cookie = p_hwfn;
+
+	memset(&data, 0, sizeof(data));
+	data.input.conn_type = QED_LL2_TYPE_IWARP;
+	data.input.mtu = QED_IWARP_MAX_SYN_PKT_SIZE;
+	data.input.rx_num_desc = QED_IWARP_LL2_SYN_RX_SIZE;
+	data.input.tx_num_desc = QED_IWARP_LL2_SYN_TX_SIZE;
+	data.input.tx_max_bds_per_packet = 1;	/* will never be fragmented */
+	data.input.tx_tc = PKT_LB_TC;
+	data.input.tx_dest = QED_LL2_TX_DEST_LB;
+	data.p_connection_handle = &iwarp_info->ll2_syn_handle;
+	data.cbs = &cbs;
+
+	rc = qed_ll2_acquire_connection(p_hwfn, &data);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to acquire LL2 connection\n");
+		qed_llh_remove_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+		return rc;
+	}
+
+	rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_syn_handle);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to establish LL2 connection\n");
+		goto err;
+	}
+
+	rc = qed_iwarp_ll2_alloc_buffers(p_hwfn,
+					 QED_IWARP_LL2_SYN_RX_SIZE,
+					 QED_IWARP_MAX_SYN_PKT_SIZE,
+					 iwarp_info->ll2_syn_handle);
+	if (rc)
+		goto err;
+
+	return rc;
+err:
+	qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		    struct qed_rdma_start_in_params *params)
+{
+	struct qed_iwarp_info *iwarp_info;
+	u32 rcv_wnd_size;
+
+	iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+
+	iwarp_info->tcp_flags = QED_IWARP_TS_EN;
+	rcv_wnd_size = QED_IWARP_RCV_WND_SIZE_DEF;
+
+	/* value 0 is used for ilog2(QED_IWARP_RCV_WND_SIZE_MIN) */
+	iwarp_info->rcv_wnd_scale = ilog2(rcv_wnd_size) -
+	    ilog2(QED_IWARP_RCV_WND_SIZE_MIN);
+	iwarp_info->crc_needed = QED_IWARP_PARAM_CRC_NEEDED;
+	iwarp_info->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED;
+
+	iwarp_info->peer2peer = QED_IWARP_PARAM_P2P;
+
+	iwarp_info->rtr_type =  MPA_RTR_TYPE_ZERO_SEND |
+				MPA_RTR_TYPE_ZERO_WRITE |
+				MPA_RTR_TYPE_ZERO_READ;
+
+	spin_lock_init(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+	INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_list);
+	INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.listen_list);
+
+	qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP,
+				  qed_iwarp_async_event);
+
+	return qed_iwarp_ll2_start(p_hwfn, params, p_ptt);
+}
+
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	int rc;
+
+	qed_iwarp_free_prealloc_ep(p_hwfn);
+	rc = qed_iwarp_wait_for_all_cids(p_hwfn);
+	if (rc)
+		return rc;
+
+	qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP);
+
+	return qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+}
+
+void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
+			   struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+	struct qed_iwarp_cm_event_params params;
+
+	qed_iwarp_modify_qp(p_hwfn, ep->qp, QED_IWARP_QP_STATE_ERROR, true);
+
+	params.event = QED_IWARP_EVENT_CLOSE;
+	params.ep_context = ep;
+	params.cm_info = &ep->cm_info;
+	params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ?
+			 0 : -ECONNRESET;
+
+	ep->state = QED_IWARP_EP_CLOSED;
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	list_del(&ep->list_entry);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	ep->event_cb(ep->cb_context, &params);
+}
+
+void qed_iwarp_exception_received(struct qed_hwfn *p_hwfn,
+				  struct qed_iwarp_ep *ep, int fw_ret_code)
+{
+	struct qed_iwarp_cm_event_params params;
+	bool event_cb = false;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "EP(0x%x) fw_ret_code=%d\n",
+		   ep->cid, fw_ret_code);
+
+	switch (fw_ret_code) {
+	case IWARP_EXCEPTION_DETECTED_LLP_CLOSED:
+		params.status = 0;
+		params.event = QED_IWARP_EVENT_DISCONNECT;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_LLP_RESET:
+		params.status = -ECONNRESET;
+		params.event = QED_IWARP_EVENT_DISCONNECT;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_RQ_EMPTY:
+		params.event = QED_IWARP_EVENT_RQ_EMPTY;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_IRQ_FULL:
+		params.event = QED_IWARP_EVENT_IRQ_FULL;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_LLP_TIMEOUT:
+		params.event = QED_IWARP_EVENT_LLP_TIMEOUT;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_REMOTE_PROTECTION_ERROR:
+		params.event = QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_CQ_OVERFLOW:
+		params.event = QED_IWARP_EVENT_CQ_OVERFLOW;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_LOCAL_CATASTROPHIC:
+		params.event = QED_IWARP_EVENT_QP_CATASTROPHIC;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_LOCAL_ACCESS_ERROR:
+		params.event = QED_IWARP_EVENT_LOCAL_ACCESS_ERROR;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_REMOTE_OPERATION_ERROR:
+		params.event = QED_IWARP_EVENT_REMOTE_OPERATION_ERROR;
+		event_cb = true;
+		break;
+	case IWARP_EXCEPTION_DETECTED_TERMINATE_RECEIVED:
+		params.event = QED_IWARP_EVENT_TERMINATE_RECEIVED;
+		event_cb = true;
+		break;
+	default:
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Unhandled exception received...fw_ret_code=%d\n",
+			   fw_ret_code);
+		break;
+	}
+
+	if (event_cb) {
+		params.ep_context = ep;
+		params.cm_info = &ep->cm_info;
+		ep->event_cb(ep->cb_context, &params);
+	}
+}
+
+static void
+qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn,
+				   struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+	struct qed_iwarp_cm_event_params params;
+
+	memset(&params, 0, sizeof(params));
+	params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
+	params.ep_context = ep;
+	params.cm_info = &ep->cm_info;
+	ep->state = QED_IWARP_EP_CLOSED;
+
+	switch (fw_return_code) {
+	case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET:
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "%s(0x%x) TCP connect got invalid packet\n",
+			   QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+		params.status = -ECONNRESET;
+		break;
+	case IWARP_CONN_ERROR_TCP_CONNECTION_RST:
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "%s(0x%x) TCP Connection Reset\n",
+			   QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+		params.status = -ECONNRESET;
+		break;
+	case IWARP_CONN_ERROR_TCP_CONNECT_TIMEOUT:
+		DP_NOTICE(p_hwfn, "%s(0x%x) TCP timeout\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+		params.status = -EBUSY;
+		break;
+	case IWARP_CONN_ERROR_MPA_NOT_SUPPORTED_VER:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA not supported VER\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+		params.status = -ECONNREFUSED;
+		break;
+	case IWARP_CONN_ERROR_MPA_INVALID_PACKET:
+		DP_NOTICE(p_hwfn, "%s(0x%x) MPA Invalid Packet\n",
+			  QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+		params.status = -ECONNRESET;
+		break;
+	default:
+		DP_ERR(p_hwfn,
+		       "%s(0x%x) Unexpected return code tcp connect: %d\n",
+		       QED_IWARP_CONNECT_MODE_STRING(ep),
+		       ep->tcp_cid, fw_return_code);
+		params.status = -ECONNRESET;
+		break;
+	}
+
+	if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+		ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+		qed_iwarp_return_ep(p_hwfn, ep);
+	} else {
+		ep->event_cb(ep->cb_context, &params);
+		spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+		list_del(&ep->list_entry);
+		spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	}
+}
+
+void
+qed_iwarp_connect_complete(struct qed_hwfn *p_hwfn,
+			   struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+	u8 ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle;
+
+	if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+		/* Done with the SYN packet, post back to ll2 rx */
+		qed_iwarp_ll2_post_rx(p_hwfn, ep->syn, ll2_syn_handle);
+
+		ep->syn = NULL;
+
+		/* If connect failed - upper layer doesn't know about it */
+		if (fw_return_code == RDMA_RETURN_OK)
+			qed_iwarp_mpa_received(p_hwfn, ep);
+		else
+			qed_iwarp_tcp_connect_unsuccessful(p_hwfn, ep,
+							   fw_return_code);
+	} else {
+		if (fw_return_code == RDMA_RETURN_OK)
+			qed_iwarp_mpa_offload(p_hwfn, ep);
+		else
+			qed_iwarp_tcp_connect_unsuccessful(p_hwfn, ep,
+							   fw_return_code);
+	}
+}
+
+static inline bool
+qed_iwarp_check_ep_ok(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+	if (!ep || (ep->sig != QED_EP_SIG)) {
+		DP_ERR(p_hwfn, "ERROR ON ASYNC ep=%p\n", ep);
+		return false;
+	}
+
+	return true;
+}
+
+static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
+				 u8 fw_event_code, u16 echo,
+				 union event_ring_data *data,
+				 u8 fw_return_code)
+{
+	struct regpair *fw_handle = &data->rdma_data.async_handle;
+	struct qed_iwarp_ep *ep = NULL;
+	u16 cid;
+
+	ep = (struct qed_iwarp_ep *)(uintptr_t)HILO_64(fw_handle->hi,
+						       fw_handle->lo);
+
+	switch (fw_event_code) {
+	case IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE:
+		/* Async completion after TCP 3-way handshake */
+		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+			return -EINVAL;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "EP(0x%x) IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE fw_ret_code=%d\n",
+			   ep->tcp_cid, fw_return_code);
+		qed_iwarp_connect_complete(p_hwfn, ep, fw_return_code);
+		break;
+	case IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED:
+		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+			return -EINVAL;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED fw_ret_code=%d\n",
+			   ep->cid, fw_return_code);
+		qed_iwarp_exception_received(p_hwfn, ep, fw_return_code);
+		break;
+	case IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE:
+		/* Async completion for Close Connection ramrod */
+		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+			return -EINVAL;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE fw_ret_code=%d\n",
+			   ep->cid, fw_return_code);
+		qed_iwarp_qp_in_error(p_hwfn, ep, fw_return_code);
+		break;
+	case IWARP_EVENT_TYPE_ASYNC_ENHANCED_MPA_REPLY_ARRIVED:
+		/* Async event for active side only */
+		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+			return -EINVAL;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_MPA_REPLY_ARRIVED fw_ret_code=%d\n",
+			   ep->cid, fw_return_code);
+		qed_iwarp_mpa_reply_arrived(p_hwfn, ep);
+		break;
+	case IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE:
+		if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+			return -EINVAL;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE fw_ret_code=%d\n",
+			   ep->cid, fw_return_code);
+		qed_iwarp_mpa_complete(p_hwfn, ep, fw_return_code);
+		break;
+	case IWARP_EVENT_TYPE_ASYNC_CID_CLEANED:
+		cid = (u16)le32_to_cpu(fw_handle->lo);
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "(0x%x)IWARP_EVENT_TYPE_ASYNC_CID_CLEANED\n", cid);
+		qed_iwarp_cid_cleaned(p_hwfn, cid);
+
+		break;
+	case IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW:
+		DP_NOTICE(p_hwfn, "IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW\n");
+
+		p_hwfn->p_rdma_info->events.affiliated_event(
+			p_hwfn->p_rdma_info->events.context,
+			QED_IWARP_EVENT_CQ_OVERFLOW,
+			(void *)fw_handle);
+		break;
+	default:
+		DP_ERR(p_hwfn, "Received unexpected async iwarp event %d\n",
+		       fw_event_code);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int
+qed_iwarp_create_listen(void *rdma_cxt,
+			struct qed_iwarp_listen_in *iparams,
+			struct qed_iwarp_listen_out *oparams)
+{
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+	struct qed_iwarp_listener *listener;
+
+	listener = kzalloc(sizeof(*listener), GFP_KERNEL);
+	if (!listener)
+		return -ENOMEM;
+
+	listener->ip_version = iparams->ip_version;
+	memcpy(listener->ip_addr, iparams->ip_addr, sizeof(listener->ip_addr));
+	listener->port = iparams->port;
+	listener->vlan = iparams->vlan;
+
+	listener->event_cb = iparams->event_cb;
+	listener->cb_context = iparams->cb_context;
+	listener->max_backlog = iparams->max_backlog;
+	oparams->handle = listener;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	list_add_tail(&listener->list_entry,
+		      &p_hwfn->p_rdma_info->iwarp.listen_list);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_RDMA,
+		   "callback=%p handle=%p ip=%x:%x:%x:%x port=0x%x vlan=0x%x\n",
+		   listener->event_cb,
+		   listener,
+		   listener->ip_addr[0],
+		   listener->ip_addr[1],
+		   listener->ip_addr[2],
+		   listener->ip_addr[3], listener->port, listener->vlan);
+
+	return 0;
+}
+
+int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle)
+{
+	struct qed_iwarp_listener *listener = handle;
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "handle=%p\n", handle);
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+	list_del(&listener->list_entry);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+	kfree(listener);
+
+	return 0;
+}
+
+int qed_iwarp_send_rtr(void *rdma_cxt, struct qed_iwarp_send_rtr_in *iparams)
+{
+	struct qed_hwfn *p_hwfn = rdma_cxt;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	struct qed_iwarp_ep *ep;
+	struct qed_rdma_qp *qp;
+	int rc;
+
+	ep = iparams->ep_context;
+	if (!ep) {
+		DP_ERR(p_hwfn, "Ep Context receive in send_rtr is NULL\n");
+		return -EINVAL;
+	}
+
+	qp = ep->qp;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x)\n",
+		   qp->icid, ep->tcp_cid);
+
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_CB;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 IWARP_RAMROD_CMD_ID_MPA_OFFLOAD_SEND_RTR,
+				 PROTOCOLID_IWARP, &init_data);
+
+	if (rc)
+		return rc;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = 0x%x\n", rc);
+
+	return rc;
+}
+
+void
+qed_iwarp_query_qp(struct qed_rdma_qp *qp,
+		   struct qed_rdma_query_qp_out_params *out_params)
+{
+	out_params->state = qed_iwarp2roce_state(qp->iwarp_state);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
new file mode 100644
index 000000000000..148ef3c33a5d
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -0,0 +1,189 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _QED_IWARP_H
+#define _QED_IWARP_H
+
+enum qed_iwarp_qp_state {
+	QED_IWARP_QP_STATE_IDLE,
+	QED_IWARP_QP_STATE_RTS,
+	QED_IWARP_QP_STATE_TERMINATE,
+	QED_IWARP_QP_STATE_CLOSING,
+	QED_IWARP_QP_STATE_ERROR,
+};
+
+enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state);
+
+#define QED_IWARP_PREALLOC_CNT  (256)
+
+#define QED_IWARP_LL2_SYN_TX_SIZE       (128)
+#define QED_IWARP_LL2_SYN_RX_SIZE       (256)
+#define QED_IWARP_MAX_SYN_PKT_SIZE      (128)
+#define QED_IWARP_HANDLE_INVAL			(0xff)
+
+struct qed_iwarp_ll2_buff {
+	void *data;
+	dma_addr_t data_phys_addr;
+	u32 buff_size;
+};
+
+struct qed_iwarp_info {
+	struct list_head listen_list;	/* qed_iwarp_listener */
+	struct list_head ep_list;	/* qed_iwarp_ep */
+	struct list_head ep_free_list;	/* pre-allocated ep's */
+	spinlock_t iw_lock;	/* for iwarp resources */
+	spinlock_t qp_lock;	/* for teardown races */
+	u32 rcv_wnd_scale;
+	u16 max_mtu;
+	u8 mac_addr[ETH_ALEN];
+	u8 crc_needed;
+	u8 tcp_flags;
+	u8 ll2_syn_handle;
+	u8 peer2peer;
+	enum mpa_negotiation_mode mpa_rev;
+	enum mpa_rtr_type rtr_type;
+};
+
+enum qed_iwarp_ep_state {
+	QED_IWARP_EP_INIT,
+	QED_IWARP_EP_MPA_REQ_RCVD,
+	QED_IWARP_EP_MPA_OFFLOADED,
+	QED_IWARP_EP_ESTABLISHED,
+	QED_IWARP_EP_CLOSED
+};
+
+union async_output {
+	struct iwarp_eqe_data_mpa_async_completion mpa_response;
+	struct iwarp_eqe_data_tcp_async_completion mpa_request;
+};
+
+#define QED_MAX_PRIV_DATA_LEN (512)
+struct qed_iwarp_ep_memory {
+	u8 in_pdata[QED_MAX_PRIV_DATA_LEN];
+	u8 out_pdata[QED_MAX_PRIV_DATA_LEN];
+	union async_output async_output;
+};
+
+/* Endpoint structure represents a TCP connection. This connection can be
+ * associated with a QP or not (in which case QP==NULL)
+ */
+struct qed_iwarp_ep {
+	struct list_head list_entry;
+	struct qed_rdma_qp *qp;
+	struct qed_iwarp_ep_memory *ep_buffer_virt;
+	dma_addr_t ep_buffer_phys;
+	enum qed_iwarp_ep_state state;
+	int sig;
+	struct qed_iwarp_cm_info cm_info;
+	enum tcp_connect_mode connect_mode;
+	enum mpa_rtr_type rtr_type;
+	enum mpa_negotiation_mode mpa_rev;
+	u32 tcp_cid;
+	u32 cid;
+	u16 mss;
+	u8 remote_mac_addr[6];
+	u8 local_mac_addr[6];
+	bool mpa_reply_processed;
+
+	/* For Passive side - syn packet related data */
+	u16 syn_ip_payload_length;
+	struct qed_iwarp_ll2_buff *syn;
+	dma_addr_t syn_phy_addr;
+
+	/* The event_cb function is called for asynchrounous events associated
+	 * with the ep. It is initialized at different entry points depending
+	 * on whether the ep is the tcp connection active side or passive side
+	 * The cb_context is passed to the event_cb function.
+	 */
+	iwarp_event_handler event_cb;
+	void *cb_context;
+};
+
+struct qed_iwarp_listener {
+	struct list_head list_entry;
+
+	/* The event_cb function is called for connection requests.
+	 * The cb_context is passed to the event_cb function.
+	 */
+	iwarp_event_handler event_cb;
+	void *cb_context;
+	u32 max_backlog;
+	u32 ip_addr[4];
+	u16 port;
+	u16 vlan;
+	u8 ip_version;
+};
+
+int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
+
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		    struct qed_rdma_start_in_params *params);
+
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn);
+
+void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn);
+
+void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn,
+			struct qed_rdma_qp *qp,
+			struct qed_rdma_create_qp_out_params *out_params);
+
+int qed_iwarp_modify_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp,
+			enum qed_iwarp_qp_state new_state, bool internal);
+
+int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp);
+
+int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp);
+
+void qed_iwarp_query_qp(struct qed_rdma_qp *qp,
+			struct qed_rdma_query_qp_out_params *out_params);
+
+int
+qed_iwarp_connect(void *rdma_cxt,
+		  struct qed_iwarp_connect_in *iparams,
+		  struct qed_iwarp_connect_out *oparams);
+
+int
+qed_iwarp_create_listen(void *rdma_cxt,
+			struct qed_iwarp_listen_in *iparams,
+			struct qed_iwarp_listen_out *oparams);
+
+int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams);
+
+int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams);
+int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle);
+
+int qed_iwarp_send_rtr(void *rdma_cxt, struct qed_iwarp_send_rtr_in *iparams);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index e57699bfbdfa..0ba5ec8a9814 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -79,8 +79,7 @@ int qed_l2_alloc(struct qed_hwfn *p_hwfn)
 	unsigned long **pp_qids;
 	u32 i;
 
-	if (p_hwfn->hw_info.personality != QED_PCI_ETH &&
-	    p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+	if (!QED_IS_L2_PERSONALITY(p_hwfn))
 		return 0;
 
 	p_l2_info = kzalloc(sizeof(*p_l2_info), GFP_KERNEL);
@@ -1228,19 +1227,6 @@ static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode)
 	return action;
 }
 
-static void qed_set_fw_mac_addr(__le16 *fw_msb,
-				__le16 *fw_mid,
-				__le16 *fw_lsb,
-				u8 *mac)
-{
-	((u8 *)fw_msb)[0] = mac[1];
-	((u8 *)fw_msb)[1] = mac[0];
-	((u8 *)fw_mid)[0] = mac[3];
-	((u8 *)fw_mid)[1] = mac[2];
-	((u8 *)fw_lsb)[0] = mac[5];
-	((u8 *)fw_lsb)[1] = mac[4];
-}
-
 static int
 qed_filter_ucast_common(struct qed_hwfn *p_hwfn,
 			u16 opaque_fid,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 17f9b0a7b553..c06ad4f0758e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -309,7 +309,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 		list_del(&p_pkt->list_entry);
 		b_last_packet = list_empty(&p_tx->active_descq);
 		list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
-		if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+		if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
 			struct qed_ooo_buffer *p_buffer;
 
 			p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
@@ -532,7 +532,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 
 		list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
 
-		if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+		if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
 			struct qed_ooo_buffer *p_buffer;
 
 			p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
@@ -893,11 +893,11 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg;
 	p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en;
 	p_ramrod->queue_id = p_ll2_conn->queue_id;
-	p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_ISCSI_OOO) ? 0
-									  : 1;
+	p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1;
 
 	if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
-	    p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) {
+	    p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) &&
+	    (conn_type != QED_LL2_TYPE_IWARP)) {
 		p_ramrod->mf_si_bcast_accept_all = 1;
 		p_ramrod->mf_si_mcast_accept_all = 1;
 	} else {
@@ -924,7 +924,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 	if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
 		return 0;
 
-	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
 		p_ll2_conn->tx_stats_en = 0;
 	else
 		p_ll2_conn->tx_stats_en = 1;
@@ -955,10 +955,10 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->pbl_size = cpu_to_le16(pbl_size);
 
 	switch (p_ll2_conn->input.tx_tc) {
-	case LB_TC:
+	case PURE_LB_TC:
 		pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB);
 		break;
-	case OOO_LB_TC:
+	case PKT_LB_TC:
 		pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OOO);
 		break;
 	default:
@@ -973,12 +973,20 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 		p_ramrod->conn_type = PROTOCOLID_FCOE;
 		break;
 	case QED_LL2_TYPE_ISCSI:
-	case QED_LL2_TYPE_ISCSI_OOO:
 		p_ramrod->conn_type = PROTOCOLID_ISCSI;
 		break;
 	case QED_LL2_TYPE_ROCE:
 		p_ramrod->conn_type = PROTOCOLID_ROCE;
 		break;
+	case QED_LL2_TYPE_IWARP:
+		p_ramrod->conn_type = PROTOCOLID_IWARP;
+		break;
+	case QED_LL2_TYPE_OOO:
+		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+			p_ramrod->conn_type = PROTOCOLID_ISCSI;
+		else
+			p_ramrod->conn_type = PROTOCOLID_IWARP;
+		break;
 	default:
 		p_ramrod->conn_type = PROTOCOLID_ETH;
 		DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type);
@@ -1142,7 +1150,7 @@ qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
 	u16 buf_idx;
 	int rc = 0;
 
-	if (p_ll2_info->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_info->input.conn_type != QED_LL2_TYPE_OOO)
 		return rc;
 
 	/* Correct number of requested OOO buffers if needed */
@@ -1280,7 +1288,7 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data)
 		goto q_allocate_fail;
 
 	/* Register callbacks for the Rx/Tx queues */
-	if (data->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+	if (data->input.conn_type == QED_LL2_TYPE_OOO) {
 		comp_rx_cb = qed_ll2_lb_rxq_completion;
 		comp_tx_cb = qed_ll2_lb_txq_completion;
 	} else {
@@ -1339,7 +1347,7 @@ static void
 qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
 				 struct qed_ll2_info *p_ll2_conn)
 {
-	if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_OOO)
 		return;
 
 	qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
@@ -1421,7 +1429,7 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 	if (rc)
 		goto out;
 
-	if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+	if (!QED_IS_RDMA_PERSONALITY(p_hwfn))
 		qed_wr(p_hwfn, p_ptt, PRS_REG_USE_LIGHT_L2, 1);
 
 	qed_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn);
@@ -1794,7 +1802,7 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
 		qed_ll2_rxq_flush(p_hwfn, connection_handle);
 	}
 
-	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
 		qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
 
 	if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
@@ -1816,7 +1824,7 @@ static void qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
 {
 	struct qed_ooo_buffer *p_buffer;
 
-	if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+	if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_OOO)
 		return;
 
 	qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
@@ -2063,7 +2071,7 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
 	ll2_cbs.cookie = QED_LEADING_HWFN(cdev);
 
 	if (lb) {
-		data->input.tx_tc = OOO_LB_TC;
+		data->input.tx_tc = PKT_LB_TC;
 		data->input.tx_dest = QED_LL2_TX_DEST_LB;
 	} else {
 		data->input.tx_tc = 0;
@@ -2080,7 +2088,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev,
 	int rc;
 
 	qed_ll2_set_conn_data(cdev, &data, params,
-			      QED_LL2_TYPE_ISCSI_OOO, handle, true);
+			      QED_LL2_TYPE_OOO, handle, true);
 
 	rc = qed_ll2_acquire_connection(hwfn, &data);
 	if (rc) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 16cc30b11cce..b11399606990 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -237,6 +237,8 @@ err0:
 int qed_fill_dev_info(struct qed_dev *cdev,
 		      struct qed_dev_info *dev_info)
 {
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_hw_info *hw_info = &p_hwfn->hw_info;
 	struct qed_tunnel_info *tun = &cdev->tunnel;
 	struct qed_ptt  *ptt;
 
@@ -260,11 +262,10 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 	dev_info->pci_mem_start = cdev->pci_params.mem_start;
 	dev_info->pci_mem_end = cdev->pci_params.mem_end;
 	dev_info->pci_irq = cdev->pci_params.irq;
-	dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality ==
-				    QED_PCI_ETH_ROCE);
+	dev_info->rdma_supported = QED_IS_RDMA_PERSONALITY(p_hwfn);
 	dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]);
 	dev_info->dev_type = cdev->type;
-	ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr);
+	ether_addr_copy(dev_info->hw_mac, hw_info->hw_mac_addr);
 
 	if (IS_PF(cdev)) {
 		dev_info->fw_major = FW_MAJOR_VERSION;
@@ -274,8 +275,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 		dev_info->mf_mode = cdev->mf_mode;
 		dev_info->tx_switching = true;
 
-		if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support ==
-		    QED_WOL_SUPPORT_PME)
+		if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME)
 			dev_info->wol_support = true;
 
 		dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id;
@@ -304,7 +304,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 				    &dev_info->mfw_rev, NULL);
 	}
 
-	dev_info->mtu = QED_LEADING_HWFN(cdev)->hw_info.mtu;
+	dev_info->mtu = hw_info->mtu;
 
 	return 0;
 }
@@ -790,7 +790,7 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
 				       cdev->num_hwfns;
 
 	if (!IS_ENABLED(CONFIG_QED_RDMA) ||
-	    QED_LEADING_HWFN(cdev)->hw_info.personality != QED_PCI_ETH_ROCE)
+	    !QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev)))
 		return 0;
 
 	for_each_hwfn(cdev, i)
@@ -931,8 +931,7 @@ static void qed_update_pf_params(struct qed_dev *cdev,
 	/* In case we might support RDMA, don't allow qede to be greedy
 	 * with the L2 contexts. Allow for 64 queues [rx, tx, xdp] per hwfn.
 	 */
-	if (QED_LEADING_HWFN(cdev)->hw_info.personality ==
-	    QED_PCI_ETH_ROCE) {
+	if (QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev))) {
 		u16 *num_cons;
 
 		num_cons = &params->eth_pf_params.num_cons;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index df76e212f86e..6fb99518a61f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -161,7 +161,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
 	num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto,
 					       NULL);
 
-	p_rdma_info->num_qps = num_cons / 2;
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		p_rdma_info->num_qps = num_cons;
+	else
+		p_rdma_info->num_qps = num_cons / 2; /* 2 cids per qp */
 
 	num_tasks = qed_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE);
 
@@ -252,6 +255,13 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
 			   "Failed to allocate real cid bitmap, rc = %d\n", rc);
 		goto free_cid_map;
 	}
+
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		rc = qed_iwarp_alloc(p_hwfn);
+
+	if (rc)
+		goto free_cid_map;
+
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n");
 	return 0;
 
@@ -329,6 +339,9 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
 {
 	struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
 
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		qed_iwarp_resc_free(p_hwfn);
+
 	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cid_map, 1);
 	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->pd_map, 1);
 	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, 1);
@@ -470,6 +483,9 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn,
 
 	if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN)
 		SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1);
+
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		qed_iwarp_init_devinfo(p_hwfn);
 }
 
 static void qed_rdma_init_port(struct qed_hwfn *p_hwfn)
@@ -490,29 +506,17 @@ static void qed_rdma_init_port(struct qed_hwfn *p_hwfn)
 
 static int qed_rdma_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-	u32 ll2_ethertype_en;
+	int rc = 0;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW\n");
 	p_hwfn->b_rdma_enabled_in_prs = false;
 
-	qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
-
-	p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE;
-
-	/* We delay writing to this reg until first cid is allocated. See
-	 * qed_cxt_dynamic_ilt_alloc function for more details
-	 */
-	ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
-	qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
-	       (ll2_ethertype_en | 0x01));
-
-	if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) {
-		DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n");
-		return -EINVAL;
-	}
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		qed_iwarp_init_hw(p_hwfn, p_ptt);
+	else
+		rc = qed_roce_init_hw(p_hwfn, p_ptt);
 
-	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n");
-	return 0;
+	return rc;
 }
 
 static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
@@ -544,7 +548,10 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
-	p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma;
+	else
+		p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
 
 	p_params_header = &p_ramrod->params_header;
 	p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn,
@@ -641,7 +648,15 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
-	qed_roce_setup(p_hwfn);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		rc = qed_iwarp_setup(p_hwfn, p_ptt, params);
+		if (rc)
+			return rc;
+	} else {
+		rc = qed_roce_setup(p_hwfn);
+		if (rc)
+			return rc;
+	}
 
 	return qed_rdma_start_fw(p_hwfn, params, p_ptt);
 }
@@ -675,7 +690,16 @@ int qed_rdma_stop(void *rdma_cxt)
 	qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
 	       (ll2_ethertype_en & 0xFFFE));
 
-	qed_roce_stop(p_hwfn);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		rc = qed_iwarp_stop(p_hwfn, p_ptt);
+		if (rc) {
+			qed_ptt_release(p_hwfn, p_ptt);
+			return rc;
+		}
+	} else {
+		qed_roce_stop(p_hwfn);
+	}
+
 	qed_ptt_release(p_hwfn, p_ptt);
 
 	/* Get SPQ entry */
@@ -810,7 +834,9 @@ static int qed_fill_rdma_dev_info(struct qed_dev *cdev,
 
 	memset(info, 0, sizeof(*info));
 
-	info->rdma_type = QED_RDMA_TYPE_ROCE;
+	info->rdma_type = QED_IS_ROCE_PERSONALITY(p_hwfn) ?
+	    QED_RDMA_TYPE_ROCE : QED_RDMA_TYPE_IWARP;
+
 	info->user_dpm_enabled = (p_hwfn->db_bar_no_edpm == 0);
 
 	qed_fill_dev_info(cdev, &info->common);
@@ -1112,7 +1138,7 @@ static int qed_rdma_query_qp(void *rdma_cxt,
 			     struct qed_rdma_query_qp_out_params *out_params)
 {
 	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
-	int rc;
+	int rc = 0;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 
@@ -1138,7 +1164,10 @@ static int qed_rdma_query_qp(void *rdma_cxt,
 	out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp;
 	out_params->sqd_async = qp->sqd_async;
 
-	rc = qed_roce_query_qp(p_hwfn, qp, out_params);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		qed_iwarp_query_qp(qp, out_params);
+	else
+		rc = qed_roce_query_qp(p_hwfn, qp, out_params);
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query QP, rc = %d\n", rc);
 	return rc;
@@ -1151,7 +1180,10 @@ static int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp)
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 
-	rc = qed_roce_destroy_qp(p_hwfn, qp);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		rc = qed_iwarp_destroy_qp(p_hwfn, qp);
+	else
+		rc = qed_roce_destroy_qp(p_hwfn, qp);
 
 	/* free qp params struct */
 	kfree(qp);
@@ -1190,20 +1222,27 @@ qed_rdma_create_qp(void *rdma_cxt,
 		return NULL;
 	}
 
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		if (in_params->sq_num_pages * sizeof(struct regpair) >
+		    IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE) {
+			DP_NOTICE(p_hwfn->cdev,
+				  "Sq num pages: %d exceeds maximum\n",
+				  in_params->sq_num_pages);
+			return NULL;
+		}
+		if (in_params->rq_num_pages * sizeof(struct regpair) >
+		    IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE) {
+			DP_NOTICE(p_hwfn->cdev,
+				  "Rq num pages: %d exceeds maximum\n",
+				  in_params->rq_num_pages);
+			return NULL;
+		}
+	}
+
 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 	if (!qp)
 		return NULL;
 
-	rc = qed_roce_alloc_cid(p_hwfn, &qp->icid);
-	qp->qpid = ((0xFF << 16) | qp->icid);
-
-	DP_INFO(p_hwfn, "ROCE qpid=%x\n", qp->qpid);
-
-	if (rc) {
-		kfree(qp);
-		return NULL;
-	}
-
 	qp->cur_state = QED_ROCE_QP_STATE_RESET;
 	qp->qp_handle.hi = cpu_to_le32(in_params->qp_handle_hi);
 	qp->qp_handle.lo = cpu_to_le32(in_params->qp_handle_lo);
@@ -1226,6 +1265,19 @@ qed_rdma_create_qp(void *rdma_cxt,
 	qp->e2e_flow_control_en = qp->use_srq ? false : true;
 	qp->stats_queue = in_params->stats_queue;
 
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		rc = qed_iwarp_create_qp(p_hwfn, qp, out_params);
+		qp->qpid = qp->icid;
+	} else {
+		rc = qed_roce_alloc_cid(p_hwfn, &qp->icid);
+		qp->qpid = ((0xFF << 16) | qp->icid);
+	}
+
+	if (rc) {
+		kfree(qp);
+		return NULL;
+	}
+
 	out_params->icid = qp->icid;
 	out_params->qp_id = qp->qpid;
 
@@ -1324,7 +1376,14 @@ static int qed_rdma_modify_qp(void *rdma_cxt,
 			   qp->cur_state);
 	}
 
-	rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		enum qed_iwarp_qp_state new_state =
+		    qed_roce2iwarp_state(qp->cur_state);
+
+		rc = qed_iwarp_modify_qp(p_hwfn, qp, new_state, 0);
+	} else {
+		rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params);
+	}
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify QP, rc = %d\n", rc);
 	return rc;
@@ -1713,6 +1772,12 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet,
 	.ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
 	.ll2_get_stats = &qed_ll2_get_stats,
+	.iwarp_connect = &qed_iwarp_connect,
+	.iwarp_create_listen = &qed_iwarp_create_listen,
+	.iwarp_destroy_listen = &qed_iwarp_destroy_listen,
+	.iwarp_accept = &qed_iwarp_accept,
+	.iwarp_reject = &qed_iwarp_reject,
+	.iwarp_send_rtr = &qed_iwarp_send_rtr,
 };
 
 const struct qed_rdma_ops *qed_get_rdma_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
index d91e5c4069a6..18ec9cbd84f5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
@@ -42,6 +42,7 @@
 #include "qed.h"
 #include "qed_dev_api.h"
 #include "qed_hsi.h"
+#include "qed_iwarp.h"
 #include "qed_roce.h"
 
 #define QED_RDMA_MAX_FMR                    (RDMA_MAX_TIDS)
@@ -84,6 +85,7 @@ struct qed_rdma_info {
 	struct qed_bmap qp_map;
 	struct qed_bmap srq_map;
 	struct qed_bmap cid_map;
+	struct qed_bmap tcp_cid_map;
 	struct qed_bmap real_cid_map;
 	struct qed_bmap dpi_map;
 	struct qed_bmap toggle_bits;
@@ -97,6 +99,7 @@ struct qed_rdma_info {
 	u16 queue_zone_base;
 	u16 max_queue_zones;
 	enum protocol_type proto;
+	struct qed_iwarp_info iwarp;
 };
 
 struct qed_rdma_qp {
@@ -105,6 +108,7 @@ struct qed_rdma_qp {
 	u32 qpid;
 	u16 icid;
 	enum qed_roce_qp_state cur_state;
+	enum qed_iwarp_qp_state iwarp_state;
 	bool use_srq;
 	bool signal_all;
 	bool fmr_and_reserved_lkey;
@@ -164,6 +168,7 @@ struct qed_rdma_qp {
 
 	void *shared_queue;
 	dma_addr_t shared_queue_phys_addr;
+	struct qed_iwarp_ep *ep;
 };
 
 #if IS_ENABLED(CONFIG_QED_RDMA)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index e53adc3d009b..fb7c2d1562ae 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -1149,3 +1149,23 @@ int qed_roce_setup(struct qed_hwfn *p_hwfn)
 					 qed_roce_async_event);
 }
 
+int qed_roce_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 ll2_ethertype_en;
+
+	qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
+
+	p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE;
+
+	ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
+	qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
+	       (ll2_ethertype_en | 0x01));
+
+	if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) {
+		DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n");
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n");
+	return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 56c95fb9a26d..ab4ad8a1e2a5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -104,12 +104,17 @@ union ramrod_data {
 	struct roce_query_qp_req_ramrod_data roce_query_qp_req;
 	struct roce_destroy_qp_resp_ramrod_data roce_destroy_qp_resp;
 	struct roce_destroy_qp_req_ramrod_data roce_destroy_qp_req;
+	struct roce_init_func_ramrod_data roce_init_func;
 	struct rdma_create_cq_ramrod_data rdma_create_cq;
 	struct rdma_destroy_cq_ramrod_data rdma_destroy_cq;
 	struct rdma_srq_create_ramrod_data rdma_create_srq;
 	struct rdma_srq_destroy_ramrod_data rdma_destroy_srq;
 	struct rdma_srq_modify_ramrod_data rdma_modify_srq;
-	struct roce_init_func_ramrod_data roce_init_func;
+	struct iwarp_create_qp_ramrod_data iwarp_create_qp;
+	struct iwarp_tcp_offload_ramrod_data iwarp_tcp_offload;
+	struct iwarp_mpa_offload_ramrod_data iwarp_mpa_offload;
+	struct iwarp_modify_qp_ramrod_data iwarp_modify_qp;
+	struct iwarp_init_func_ramrod_data iwarp_init_func;
 	struct fcoe_init_ramrod_params fcoe_init;
 	struct fcoe_conn_offload_ramrod_params fcoe_conn_ofld;
 	struct fcoe_conn_terminate_ramrod_params fcoe_conn_terminate;
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index bd0e3f157e9e..600e30e8f0be 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -1409,8 +1409,8 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port,
 		*index = entry->index;
 		resolved = false;
 	} else if (removing) {
-		ofdpa_neigh_del(found);
 		*index = found->index;
+		ofdpa_neigh_del(found);
 	} else if (updating) {
 		ofdpa_neigh_update(found, NULL, false);
 		resolved = !is_zero_ether_addr(found->eth_dst);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index ad9c4ded2b90..761c518b2f92 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -4172,7 +4172,7 @@ found:
 	 * recipients
 	 */
 	if (is_mc_recip) {
-		MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+		MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 		unsigned int depth, i;
 
 		memset(inbuf, 0, sizeof(inbuf));
@@ -4320,7 +4320,7 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 			efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
 		} else {
 			efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
-					       MC_CMD_FILTER_OP_IN_LEN,
+					       MC_CMD_FILTER_OP_EXT_IN_LEN,
 					       NULL, 0, rc);
 		}
 	}
@@ -4453,7 +4453,7 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
 				      struct efx_filter_spec *spec)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 	struct efx_filter_spec *saved_spec;
 	unsigned int hash, i, depth = 1;
 	bool replacing = false;
@@ -4940,7 +4940,7 @@ not_restored:
 static void efx_ef10_filter_table_remove(struct efx_nic *efx)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 	struct efx_filter_spec *spec;
 	unsigned int filter_idx;
 	int rc;
@@ -5105,6 +5105,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 
 	/* Insert/renew filters */
 	for (i = 0; i < addr_count; i++) {
+		EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
 		efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
 		efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
 		rc = efx_ef10_filter_insert(efx, &spec, true);
@@ -5122,11 +5123,11 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 				}
 				return rc;
 			} else {
-				/* mark as not inserted, and carry on */
-				rc = EFX_EF10_FILTER_ID_INVALID;
+				/* keep invalid ID, and carry on */
 			}
+		} else {
+			ids[i] = efx_ef10_filter_get_unsafe_id(rc);
 		}
-		ids[i] = efx_ef10_filter_get_unsafe_id(rc);
 	}
 
 	if (multicast && rollback) {
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index b9422450deb8..3df872f56289 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -1301,7 +1301,7 @@ static void efx_mcdi_abandon(struct efx_nic *efx)
 	efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT);
 }
 
-/* Called from  falcon_process_eventq for MCDI events */
+/* Called from efx_farch_ev_process and efx_ef10_ev_process for MCDI events */
 void efx_mcdi_process_event(struct efx_channel *channel,
 			    efx_qword_t *event)
 {
@@ -1389,8 +1389,9 @@ void efx_mcdi_process_event(struct efx_channel *channel,
 				MCDI_EVENT_FIELD(*event, PROXY_RESPONSE_RC));
 		break;
 	default:
-		netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n",
-			  code);
+		netif_err(efx, hw, efx->net_dev,
+			  "Unknown MCDI event " EFX_QWORD_FMT "\n",
+			  EFX_QWORD_VAL(*event));
 	}
 }
 
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 0d230b125c6c..080428762858 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2485,7 +2485,7 @@ static int smc_drv_resume(struct device *dev)
 	return 0;
 }
 
-static struct dev_pm_ops smc_drv_pm_ops = {
+static const struct dev_pm_ops smc_drv_pm_ops = {
 	.suspend	= smc_drv_suspend,
 	.resume		= smc_drv_resume,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index fffd6d5fc907..6c2d1da05588 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -638,7 +638,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 {
 	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
 	struct device_node *node = priv->device->of_node;
-	int ret;
+	int ret, phy_interface;
 	u32 reg, val;
 
 	regmap_read(gmac->regmap, SYSCON_EMAC_REG, &val);
@@ -718,7 +718,11 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 	if (gmac->variant->support_rmii)
 		reg &= ~SYSCON_RMII_EN;
 
-	switch (priv->plat->interface) {
+	phy_interface = priv->plat->interface;
+	/* if PHY is internal, select the mode (xMII) used by the SoC */
+	if (gmac->use_internal_phy)
+		phy_interface = gmac->variant->internal_phy;
+	switch (phy_interface) {
 	case PHY_INTERFACE_MODE_MII:
 		/* default */
 		break;
@@ -932,7 +936,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	}
 
 	plat_dat->interface = of_get_phy_mode(dev->of_node);
-	if (plat_dat->interface == gmac->variant->internal_phy) {
+	if (plat_dat->interface == PHY_INTERFACE_MODE_INTERNAL) {
 		dev_info(&pdev->dev, "Will use internal PHY\n");
 		gmac->use_internal_phy = true;
 		gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 471a9aa6ac94..22cf6353ba04 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -205,8 +205,8 @@ static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
 {
 	int i;
 
-	for (i = 0; i < 22; i++)
-		if ((i < 9) || (i > 17))
+	for (i = 0; i < 23; i++)
+		if ((i < 12) || (i > 17))
 			reg_space[DMA_BUS_MODE / 4 + i] =
 				readl(ioaddr + DMA_BUS_MODE + i * 4);
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 743170d57f62..babb39c646ff 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -29,7 +29,7 @@
 #include "stmmac.h"
 #include "dwmac_dma.h"
 
-#define REG_SPACE_SIZE	0x1054
+#define REG_SPACE_SIZE	0x1060
 #define MAC100_ETHTOOL_NAME	"st_mac100"
 #define GMAC_ETHTOOL_NAME	"st_gmac"
 
diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c
index 1562ab4151e1..56ba411421f0 100644
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c
@@ -90,7 +90,7 @@ int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
 	if (of_device_is_compatible(dev->of_node, "ti,dm816-emac"))
 		return cpsw_am33xx_cm_get_macid(dev, 0x30, slave, mac_addr);
 
-	if (of_machine_is_compatible("ti,am4372"))
+	if (of_machine_is_compatible("ti,am43"))
 		return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
 
 	if (of_machine_is_compatible("ti,dra7"))
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index b7a0f5eeab62..1850e348f555 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1236,6 +1236,7 @@ static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv,
 {
 	struct cpsw_common *cpsw = priv->cpsw;
 
+	skb_tx_timestamp(skb);
 	return cpdma_chan_submit(txch, skb, skb->data, skb->len,
 				 priv->emac_port + cpsw->data.dual_emac);
 }
@@ -1597,6 +1598,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpts *cpts = cpsw->cpts;
 	struct netdev_queue *txq;
 	struct cpdma_chan *txch;
 	int ret, q_idx;
@@ -1608,11 +1610,9 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 	}
 
 	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-	    cpts_is_tx_enabled(cpsw->cpts))
+	    cpts_is_tx_enabled(cpts) && cpts_can_timestamp(cpts, skb))
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
-	skb_tx_timestamp(skb);
-
 	q_idx = skb_get_queue_mapping(skb);
 	if (q_idx >= cpsw->tx_ch_num)
 		q_idx = q_idx % cpsw->tx_ch_num;
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index c96eca2b1b46..01ea82ba9cdc 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -30,6 +30,7 @@
 #include <linux/of.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/skbuff.h>
+#include <linux/ptp_classify.h>
 #include <linux/timecounter.h>
 
 struct cpsw_cpts {
@@ -155,6 +156,16 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts)
 	return !!cpts->tx_enable;
 }
 
+static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
+{
+	unsigned int class = ptp_classify_raw(skb);
+
+	if (class == PTP_CLASS_NONE)
+		return false;
+
+	return true;
+}
+
 #else
 struct cpts;
 
@@ -203,6 +214,11 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts)
 {
 	return false;
 }
+
+static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
+{
+	return false;
+}
 #endif
 
 
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 0847a8f48cfe..28cb38af1a34 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2503,24 +2503,8 @@ static bool gbe_need_txtstamp(struct gbe_intf *gbe_intf,
 			      const struct netcp_packet *p_info)
 {
 	struct sk_buff *skb = p_info->skb;
-	unsigned int class = ptp_classify_raw(skb);
 
-	if (class == PTP_CLASS_NONE)
-		return false;
-
-	switch (class) {
-	case PTP_CLASS_V1_IPV4:
-	case PTP_CLASS_V1_IPV6:
-	case PTP_CLASS_V2_IPV4:
-	case PTP_CLASS_V2_IPV6:
-	case PTP_CLASS_V2_L2:
-	case (PTP_CLASS_V2_VLAN | PTP_CLASS_L2):
-	case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV4):
-	case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV6):
-		return true;
-	}
-
-	return false;
+	return cpts_can_timestamp(gbe_intf->gbe_dev->cpts, skb);
 }
 
 static int gbe_txtstamp_mark_pkt(struct gbe_intf *gbe_intf,
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index eb77201cb718..de8156c6b292 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -45,9 +45,17 @@ struct geneve_net {
 
 static unsigned int geneve_net_id;
 
+struct geneve_dev_node {
+	struct hlist_node hlist;
+	struct geneve_dev *geneve;
+};
+
 /* Pseudo network device */
 struct geneve_dev {
-	struct hlist_node  hlist;	/* vni hash table */
+	struct geneve_dev_node hlist4;	/* vni hash table for IPv4 socket */
+#if IS_ENABLED(CONFIG_IPV6)
+	struct geneve_dev_node hlist6;	/* vni hash table for IPv6 socket */
+#endif
 	struct net	   *net;	/* netns for packet i/o */
 	struct net_device  *dev;	/* netdev for geneve tunnel */
 	struct ip_tunnel_info info;
@@ -123,16 +131,16 @@ static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
 					__be32 addr, u8 vni[])
 {
 	struct hlist_head *vni_list_head;
-	struct geneve_dev *geneve;
+	struct geneve_dev_node *node;
 	__u32 hash;
 
 	/* Find the device for this VNI */
 	hash = geneve_net_vni_hash(vni);
 	vni_list_head = &gs->vni_list[hash];
-	hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
-		if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) &&
-		    addr == geneve->info.key.u.ipv4.dst)
-			return geneve;
+	hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
+		if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
+		    addr == node->geneve->info.key.u.ipv4.dst)
+			return node->geneve;
 	}
 	return NULL;
 }
@@ -142,16 +150,16 @@ static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
 					 struct in6_addr addr6, u8 vni[])
 {
 	struct hlist_head *vni_list_head;
-	struct geneve_dev *geneve;
+	struct geneve_dev_node *node;
 	__u32 hash;
 
 	/* Find the device for this VNI */
 	hash = geneve_net_vni_hash(vni);
 	vni_list_head = &gs->vni_list[hash];
-	hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
-		if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) &&
-		    ipv6_addr_equal(&addr6, &geneve->info.key.u.ipv6.dst))
-			return geneve;
+	hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
+		if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
+		    ipv6_addr_equal(&addr6, &node->geneve->info.key.u.ipv6.dst))
+			return node->geneve;
 	}
 	return NULL;
 }
@@ -591,6 +599,7 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
 {
 	struct net *net = geneve->net;
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
+	struct geneve_dev_node *node;
 	struct geneve_sock *gs;
 	__u8 vni[3];
 	__u32 hash;
@@ -609,15 +618,20 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
 out:
 	gs->collect_md = geneve->collect_md;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (ipv6)
+	if (ipv6) {
 		rcu_assign_pointer(geneve->sock6, gs);
-	else
+		node = &geneve->hlist6;
+	} else
 #endif
+	{
 		rcu_assign_pointer(geneve->sock4, gs);
+		node = &geneve->hlist4;
+	}
+	node->geneve = geneve;
 
 	tunnel_id_to_vni(geneve->info.key.tun_id, vni);
 	hash = geneve_net_vni_hash(vni);
-	hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]);
+	hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
 	return 0;
 }
 
@@ -644,8 +658,10 @@ static int geneve_stop(struct net_device *dev)
 {
 	struct geneve_dev *geneve = netdev_priv(dev);
 
-	if (!hlist_unhashed(&geneve->hlist))
-		hlist_del_rcu(&geneve->hlist);
+	hlist_del_init_rcu(&geneve->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+	hlist_del_init_rcu(&geneve->hlist6.hlist);
+#endif
 	geneve_sock_release(geneve);
 	return 0;
 }
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 991372150463..63c98bbbc596 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -752,7 +752,7 @@ static int netvsc_set_channels(struct net_device *net,
 	    channels->rx_count || channels->tx_count || channels->other_count)
 		return -EINVAL;
 
-	if (count > net->num_tx_queues || count > net->num_rx_queues)
+	if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
 		return -EINVAL;
 
 	if (!nvdev || nvdev->destroy)
@@ -1179,7 +1179,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
 	rndis_dev = ndev->extension;
 	if (indir) {
 		for (i = 0; i < ITAB_NUM; i++)
-			if (indir[i] >= dev->num_rx_queues)
+			if (indir[i] >= VRSS_CHANNEL_MAX)
 				return -EINVAL;
 
 		for (i = 0; i < ITAB_NUM; i++)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9ffff0362a11..0f581ee74fe4 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -39,16 +39,20 @@
 #define MACVLAN_HASH_SIZE	(1<<MACVLAN_HASH_BITS)
 #define MACVLAN_BC_QUEUE_LEN	1000
 
+#define MACVLAN_F_PASSTHRU	1
+#define MACVLAN_F_ADDRCHANGE	2
+
 struct macvlan_port {
 	struct net_device	*dev;
 	struct hlist_head	vlan_hash[MACVLAN_HASH_SIZE];
 	struct list_head	vlans;
 	struct sk_buff_head	bc_queue;
 	struct work_struct	bc_work;
-	bool 			passthru;
+	u32			flags;
 	int			count;
 	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE];
 	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
+	unsigned char           perm_addr[ETH_ALEN];
 };
 
 struct macvlan_source_entry {
@@ -66,6 +70,31 @@ struct macvlan_skb_cb {
 
 static void macvlan_port_destroy(struct net_device *dev);
 
+static inline bool macvlan_passthru(const struct macvlan_port *port)
+{
+	return port->flags & MACVLAN_F_PASSTHRU;
+}
+
+static inline void macvlan_set_passthru(struct macvlan_port *port)
+{
+	port->flags |= MACVLAN_F_PASSTHRU;
+}
+
+static inline bool macvlan_addr_change(const struct macvlan_port *port)
+{
+	return port->flags & MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_set_addr_change(struct macvlan_port *port)
+{
+	port->flags |= MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_clear_addr_change(struct macvlan_port *port)
+{
+	port->flags &= ~MACVLAN_F_ADDRCHANGE;
+}
+
 /* Hash Ethernet address */
 static u32 macvlan_eth_hash(const unsigned char *addr)
 {
@@ -181,11 +210,12 @@ static void macvlan_hash_change_addr(struct macvlan_dev *vlan,
 static bool macvlan_addr_busy(const struct macvlan_port *port,
 			      const unsigned char *addr)
 {
-	/* Test to see if the specified multicast address is
+	/* Test to see if the specified address is
 	 * currently in use by the underlying device or
 	 * another macvlan.
 	 */
-	if (ether_addr_equal_64bits(port->dev->dev_addr, addr))
+	if (!macvlan_passthru(port) && !macvlan_addr_change(port) &&
+	    ether_addr_equal_64bits(port->dev->dev_addr, addr))
 		return true;
 
 	if (macvlan_hash_lookup(port, addr))
@@ -445,7 +475,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 	}
 
 	macvlan_forward_source(skb, port, eth->h_source);
-	if (port->passthru)
+	if (macvlan_passthru(port))
 		vlan = list_first_or_null_rcu(&port->vlans,
 					      struct macvlan_dev, list);
 	else
@@ -574,7 +604,7 @@ static int macvlan_open(struct net_device *dev)
 	struct net_device *lowerdev = vlan->lowerdev;
 	int err;
 
-	if (vlan->port->passthru) {
+	if (macvlan_passthru(vlan->port)) {
 		if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) {
 			err = dev_set_promiscuity(lowerdev, 1);
 			if (err < 0)
@@ -649,7 +679,7 @@ static int macvlan_stop(struct net_device *dev)
 	dev_uc_unsync(lowerdev, dev);
 	dev_mc_unsync(lowerdev, dev);
 
-	if (vlan->port->passthru) {
+	if (macvlan_passthru(vlan->port)) {
 		if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC))
 			dev_set_promiscuity(lowerdev, -1);
 		goto hash_del;
@@ -672,6 +702,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
+	struct macvlan_port *port = vlan->port;
 	int err;
 
 	if (!(dev->flags & IFF_UP)) {
@@ -682,7 +713,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 		if (macvlan_addr_busy(vlan->port, addr))
 			return -EBUSY;
 
-		if (!vlan->port->passthru) {
+		if (!macvlan_passthru(port)) {
 			err = dev_uc_add(lowerdev, addr);
 			if (err)
 				return err;
@@ -692,6 +723,15 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 
 		macvlan_hash_change_addr(vlan, addr);
 	}
+	if (macvlan_passthru(port) && !macvlan_addr_change(port)) {
+		/* Since addr_change isn't set, we are here due to lower
+		 * device change.  Save the lower-dev address so we can
+		 * restore it later.
+		 */
+		ether_addr_copy(vlan->port->perm_addr,
+				lowerdev->dev_addr);
+	}
+	macvlan_clear_addr_change(port);
 	return 0;
 }
 
@@ -703,8 +743,14 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p)
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	if (vlan->mode == MACVLAN_MODE_PASSTHRU)
+	/* If the addresses are the same, this is a no-op */
+	if (ether_addr_equal(dev->dev_addr, addr->sa_data))
+		return 0;
+
+	if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
+		macvlan_set_addr_change(vlan->port);
 		return dev_set_mac_address(vlan->lowerdev, addr);
+	}
 
 	return macvlan_sync_address(dev, addr->sa_data);
 }
@@ -926,7 +972,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	/* Support unicast filter only on passthru devices.
 	 * Multicast filter should be allowed on all devices.
 	 */
-	if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+	if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
 		return -EOPNOTSUPP;
 
 	if (flags & NLM_F_REPLACE)
@@ -950,7 +996,7 @@ static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 	/* Support unicast filter only on passthru devices.
 	 * Multicast filter should be allowed on all devices.
 	 */
-	if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+	if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
 		return -EOPNOTSUPP;
 
 	if (is_unicast_ether_addr(addr))
@@ -1118,8 +1164,8 @@ static int macvlan_port_create(struct net_device *dev)
 	if (port == NULL)
 		return -ENOMEM;
 
-	port->passthru = false;
 	port->dev = dev;
+	ether_addr_copy(port->perm_addr, dev->dev_addr);
 	INIT_LIST_HEAD(&port->vlans);
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
@@ -1159,6 +1205,18 @@ static void macvlan_port_destroy(struct net_device *dev)
 		kfree_skb(skb);
 	}
 
+	/* If the lower device address has been changed by passthru
+	 * macvlan, put it back.
+	 */
+	if (macvlan_passthru(port) &&
+	    !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) {
+		struct sockaddr sa;
+
+		sa.sa_family = port->dev->type;
+		memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len);
+		dev_set_mac_address(port->dev, &sa);
+	}
+
 	kfree(port);
 }
 
@@ -1325,7 +1383,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	port = macvlan_port_get_rtnl(lowerdev);
 
 	/* Only 1 macvlan device can be created in passthru mode */
-	if (port->passthru) {
+	if (macvlan_passthru(port)) {
 		/* The macvlan port must be not created this time,
 		 * still goto destroy_macvlan_port for readability.
 		 */
@@ -1351,7 +1409,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 			err = -EINVAL;
 			goto destroy_macvlan_port;
 		}
-		port->passthru = true;
+		macvlan_set_passthru(port);
 		eth_hw_addr_inherit(dev, lowerdev);
 	}
 
@@ -1435,7 +1493,7 @@ static int macvlan_changelink(struct net_device *dev,
 	if (data && data[IFLA_MACVLAN_FLAGS]) {
 		__u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
 		bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC;
-		if (vlan->port->passthru && promisc) {
+		if (macvlan_passthru(vlan->port) && promisc) {
 			int err;
 
 			if (flags & MACVLAN_FLAG_NOPROMISC)
@@ -1598,7 +1656,7 @@ static int macvlan_device_event(struct notifier_block *unused,
 		}
 		break;
 	case NETDEV_CHANGEADDR:
-		if (!port->passthru)
+		if (!macvlan_passthru(port))
 			return NOTIFY_DONE;
 
 		vlan = list_first_entry_or_null(&port->vlans,
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index ed0d10f54f26..c3065236ffcc 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -908,7 +908,7 @@ static void decode_txts(struct dp83640_private *dp83640,
 	if (overflow) {
 		pr_debug("tx timestamp queue overflow, count %d\n", overflow);
 		while (skb) {
-			skb_complete_tx_timestamp(skb, NULL);
+			kfree_skb(skb);
 			skb = skb_dequeue(&dp83640->tx_queue);
 		}
 		return;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 8400403b3f62..5d314f143aea 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2171,6 +2171,7 @@ static struct phy_driver marvell_drivers[] = {
 		.get_sset_count = marvell_get_sset_count,
 		.get_strings = marvell_get_strings,
 		.get_stats = marvell_get_stats,
+		.set_loopback = genphy_loopback,
 	},
 	{
 		.phy_id = MARVELL_PHY_ID_88E1540,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 9365b0792309..fdb43dd9b5cd 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -620,6 +620,8 @@ static int ksz9031_read_status(struct phy_device *phydev)
 	if ((regval & 0xFF) == 0xFF) {
 		phy_init_hw(phydev);
 		phydev->link = 0;
+		if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
+			phydev->drv->config_intr(phydev);
 	}
 
 	return 0;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index acf00f071c9a..1790f7fec125 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1136,6 +1136,39 @@ int phy_resume(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_resume);
 
+int phy_loopback(struct phy_device *phydev, bool enable)
+{
+	struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
+	int ret = 0;
+
+	mutex_lock(&phydev->lock);
+
+	if (enable && phydev->loopback_enabled) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (!enable && !phydev->loopback_enabled) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (phydev->drv && phydrv->set_loopback)
+		ret = phydrv->set_loopback(phydev, enable);
+	else
+		ret = -EOPNOTSUPP;
+
+	if (ret)
+		goto out;
+
+	phydev->loopback_enabled = enable;
+
+out:
+	mutex_unlock(&phydev->lock);
+	return ret;
+}
+EXPORT_SYMBOL(phy_loopback);
+
 /* Generic PHY support and helper functions */
 
 /**
@@ -1584,6 +1617,23 @@ int genphy_resume(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(genphy_resume);
 
+int genphy_loopback(struct phy_device *phydev, bool enable)
+{
+	int value;
+
+	value = phy_read(phydev, MII_BMCR);
+	if (value < 0)
+		return value;
+
+	if (enable)
+		value |= BMCR_LOOPBACK;
+	else
+		value &= ~BMCR_LOOPBACK;
+
+	return phy_write(phydev, MII_BMCR, value);
+}
+EXPORT_SYMBOL(genphy_loopback);
+
 static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
 {
 	/* The default values for phydev->supported are provided by the PHY
@@ -1829,6 +1879,7 @@ static struct phy_driver genphy_driver = {
 	.read_status	= genphy_read_status,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
+	.set_loopback   = genphy_loopback,
 };
 
 static int __init phy_init(void)
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 300bb1479b3a..e9f101c9bae2 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -201,7 +201,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 				rionet_queue_tx_msg(skb, ndev,
 					nets[rnet->mport->id].active[i]);
 				if (count)
-					atomic_inc(&skb->users);
+					refcount_inc(&skb->users);
 				count++;
 			}
 	} else if (RIONET_MAC_MATCH(eth->h_dest)) {
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 793ce900dffa..f32261ecd215 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1725,6 +1725,18 @@ static const struct driver_info lenovo_info = {
 	.tx_fixup = ax88179_tx_fixup,
 };
 
+static const struct driver_info belkin_info = {
+	.description = "Belkin USB Ethernet Adapter",
+	.bind	= ax88179_bind,
+	.unbind = ax88179_unbind,
+	.status = ax88179_status,
+	.link_reset = ax88179_link_reset,
+	.reset	= ax88179_reset,
+	.flags	= FLAG_ETHER | FLAG_FRAMING_AX,
+	.rx_fixup = ax88179_rx_fixup,
+	.tx_fixup = ax88179_tx_fixup,
+};
+
 static const struct usb_device_id products[] = {
 {
 	/* ASIX AX88179 10/100/1000 */
@@ -1754,6 +1766,10 @@ static const struct usb_device_id products[] = {
 	/* Lenovo OneLinkDock Gigabit LAN */
 	USB_DEVICE(0x17ef, 0x304b),
 	.driver_info = (unsigned long)&lenovo_info,
+}, {
+	/* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */
+	USB_DEVICE(0x050d, 0x0128),
+	.driver_info = (unsigned long)&belkin_info,
 },
 	{ },
 };
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 18fa45fc979b..7220cd620717 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -643,6 +643,13 @@ static const struct usb_device_id mbim_devs[] = {
 	  .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end,
 	},
 
+	/* The HP lt4132 (03f0:a31d) is a rebranded Huawei ME906s-158,
+	 * therefore it too requires the above "NDP to end" quirk.
+	 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+	  .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end,
+	},
+
 	/* Telit LE922A6 in MBIM composition */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1041, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
 	  .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 2067743f51ca..d103a1d4fb36 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -89,6 +89,8 @@ static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = {
 	CDC_NCM_SIMPLE_STAT(rx_ntbs),
 };
 
+#define CDC_NCM_LOW_MEM_MAX_CNT 10
+
 static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset)
 {
 	switch (sset) {
@@ -1055,10 +1057,10 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
 
 	/* align new NDP */
 	if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END))
-		cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max);
+		cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size);
 
 	/* verify that there is room for the NDP and the datagram (reserve) */
-	if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size)
+	if ((ctx->tx_curr_size - skb->len - reserve) < ctx->max_ndp_size)
 		return NULL;
 
 	/* link to it */
@@ -1111,13 +1113,41 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 
 	/* allocate a new OUT skb */
 	if (!skb_out) {
-		skb_out = alloc_skb(ctx->tx_max, GFP_ATOMIC);
+		if (ctx->tx_low_mem_val == 0) {
+			ctx->tx_curr_size = ctx->tx_max;
+			skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
+			/* If the memory allocation fails we will wait longer
+			 * each time before attempting another full size
+			 * allocation again to not overload the system
+			 * further.
+			 */
+			if (skb_out == NULL) {
+				ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1,
+							      (unsigned)CDC_NCM_LOW_MEM_MAX_CNT);
+				ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt;
+			}
+		}
 		if (skb_out == NULL) {
-			if (skb != NULL) {
-				dev_kfree_skb_any(skb);
-				dev->net->stats.tx_dropped++;
+			/* See if a very small allocation is possible.
+			 * We will send this packet immediately and hope
+			 * that there is more memory available later.
+			 */
+			if (skb)
+				ctx->tx_curr_size = max(skb->len,
+					(u32)USB_CDC_NCM_NTB_MIN_OUT_SIZE);
+			else
+				ctx->tx_curr_size = USB_CDC_NCM_NTB_MIN_OUT_SIZE;
+			skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
+
+			/* No allocation possible so we will abort */
+			if (skb_out == NULL) {
+				if (skb != NULL) {
+					dev_kfree_skb_any(skb);
+					dev->net->stats.tx_dropped++;
+				}
+				goto exit_no_skb;
 			}
-			goto exit_no_skb;
+			ctx->tx_low_mem_val--;
 		}
 		/* fill out the initial 16-bit NTB header */
 		nth16 = skb_put_zero(skb_out, sizeof(struct usb_cdc_ncm_nth16));
@@ -1148,10 +1178,10 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		ndp16 = cdc_ncm_ndp(ctx, skb_out, sign, skb->len + ctx->tx_modulus + ctx->tx_remainder);
 
 		/* align beginning of next frame */
-		cdc_ncm_align_tail(skb_out,  ctx->tx_modulus, ctx->tx_remainder, ctx->tx_max);
+		cdc_ncm_align_tail(skb_out,  ctx->tx_modulus, ctx->tx_remainder, ctx->tx_curr_size);
 
 		/* check if we had enough room left for both NDP and frame */
-		if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_max) {
+		if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_curr_size) {
 			if (n == 0) {
 				/* won't fit, MTU problem? */
 				dev_kfree_skb_any(skb);
@@ -1227,7 +1257,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	/* If requested, put NDP at end of frame. */
 	if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) {
 		nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data;
-		cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max);
+		cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size);
 		nth16->wNdpIndex = cpu_to_le16(skb_out->len);
 		skb_put_data(skb_out, ctx->delayed_ndp16, ctx->max_ndp_size);
 
@@ -1246,9 +1276,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	 */
 	if (!(dev->driver_info->flags & FLAG_SEND_ZLP) &&
 	    skb_out->len > ctx->min_tx_pkt) {
-		padding_count = ctx->tx_max - skb_out->len;
+		padding_count = ctx->tx_curr_size - skb_out->len;
 		skb_put_zero(skb_out, padding_count);
-	} else if (skb_out->len < ctx->tx_max &&
+	} else if (skb_out->len < ctx->tx_curr_size &&
 		   (skb_out->len % dev->maxpacket) == 0) {
 		skb_put_u8(skb_out, 0);	/* force short packet */
 	}
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index b33553b1e19c..f5438d0978ca 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -385,7 +385,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 		tbp = tb;
 	}
 
-	if (tbp[IFLA_IFNAME]) {
+	if (ifmp && tbp[IFLA_IFNAME]) {
 		nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
 		name_assign_type = NET_NAME_USER;
 	} else {
@@ -404,7 +404,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 		return PTR_ERR(peer);
 	}
 
-	if (tbp[IFLA_ADDRESS] == NULL)
+	if (!ifmp || !tbp[IFLA_ADDRESS])
 		eth_hw_addr_random(peer);
 
 	if (ifmp && (dev->ifindex != 0))
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 5c6388fb7dd1..2e69bcdc5b07 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1802,6 +1802,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 	flush_work(&vi->config_work);
 
 	netif_device_detach(vi->dev);
+	netif_tx_disable(vi->dev);
 	cancel_delayed_work_sync(&vi->refill);
 
 	if (netif_running(vi->dev)) {
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 0dafd8e6c665..b04e103350fb 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -229,25 +229,25 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
 static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
 					   __be32 vni)
 {
-	struct vxlan_dev *vxlan;
+	struct vxlan_dev_node *node;
 
 	/* For flow based devices, map all packets to VNI 0 */
 	if (vs->flags & VXLAN_F_COLLECT_METADATA)
 		vni = 0;
 
-	hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) {
-		if (vxlan->default_dst.remote_vni != vni)
+	hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
+		if (node->vxlan->default_dst.remote_vni != vni)
 			continue;
 
 		if (IS_ENABLED(CONFIG_IPV6)) {
-			const struct vxlan_config *cfg = &vxlan->cfg;
+			const struct vxlan_config *cfg = &node->vxlan->cfg;
 
 			if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) &&
 			    cfg->remote_ifindex != ifindex)
 				continue;
 		}
 
-		return vxlan;
+		return node->vxlan;
 	}
 
 	return NULL;
@@ -2387,17 +2387,22 @@ static void vxlan_vs_del_dev(struct vxlan_dev *vxlan)
 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 
 	spin_lock(&vn->sock_lock);
-	hlist_del_init_rcu(&vxlan->hlist);
+	hlist_del_init_rcu(&vxlan->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+	hlist_del_init_rcu(&vxlan->hlist6.hlist);
+#endif
 	spin_unlock(&vn->sock_lock);
 }
 
-static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
+static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan,
+			     struct vxlan_dev_node *node)
 {
 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 	__be32 vni = vxlan->default_dst.remote_vni;
 
+	node->vxlan = vxlan;
 	spin_lock(&vn->sock_lock);
-	hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
+	hlist_add_head_rcu(&node->hlist, vni_head(vs, vni));
 	spin_unlock(&vn->sock_lock);
 }
 
@@ -2656,7 +2661,6 @@ static void vxlan_setup(struct net_device *dev)
 	vxlan->age_timer.data = (unsigned long) vxlan;
 
 	vxlan->dev = dev;
-	vxlan->net = dev_net(dev);
 
 	gro_cells_init(&vxlan->gro_cells, dev);
 
@@ -2727,7 +2731,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
 	}
 
 	if (tb[IFLA_MTU]) {
-		u32 mtu = nla_get_u32(data[IFLA_MTU]);
+		u32 mtu = nla_get_u32(tb[IFLA_MTU]);
 
 		if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU)
 			return -EINVAL;
@@ -2850,6 +2854,7 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
 {
 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 	struct vxlan_sock *vs = NULL;
+	struct vxlan_dev_node *node;
 
 	if (!vxlan->cfg.no_share) {
 		spin_lock(&vn->sock_lock);
@@ -2867,12 +2872,16 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
 	if (IS_ERR(vs))
 		return PTR_ERR(vs);
 #if IS_ENABLED(CONFIG_IPV6)
-	if (ipv6)
+	if (ipv6) {
 		rcu_assign_pointer(vxlan->vn6_sock, vs);
-	else
+		node = &vxlan->hlist6;
+	} else
 #endif
+	{
 		rcu_assign_pointer(vxlan->vn4_sock, vs);
-	vxlan_vs_add_dev(vs, vxlan);
+		node = &vxlan->hlist4;
+	}
+	vxlan_vs_add_dev(vs, vxlan, node);
 	return 0;
 }
 
@@ -3028,7 +3037,9 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
 
 static void vxlan_config_apply(struct net_device *dev,
 			       struct vxlan_config *conf,
-			       struct net_device *lowerdev, bool changelink)
+			       struct net_device *lowerdev,
+			       struct net *src_net,
+			       bool changelink)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_rdst *dst = &vxlan->default_dst;
@@ -3044,6 +3055,8 @@ static void vxlan_config_apply(struct net_device *dev,
 
 		if (conf->mtu)
 			dev->mtu = conf->mtu;
+
+		vxlan->net = src_net;
 	}
 
 	dst->remote_vni = conf->vni;
@@ -3086,7 +3099,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	if (ret)
 		return ret;
 
-	vxlan_config_apply(dev, conf, lowerdev, changelink);
+	vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);
 
 	return 0;
 }
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 530586be05b4..5b1d2e8402d9 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 	unsigned long   remaining_credit;
 	struct timer_list credit_timeout;
 	u64 credit_window_start;
+	bool rate_limited;
 
 	/* Statistics */
 	struct xenvif_stats stats;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 8397f6c92451..e322a862ddfe 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
 
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
-		xenvif_napi_schedule_or_enable_events(queue);
+		/* If the queue is rate-limited, it shall be
+		 * rescheduled in the timer callback.
+		 */
+		if (likely(!queue->rate_limited))
+			xenvif_napi_schedule_or_enable_events(queue);
 	}
 
 	return work_done;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 602d408fa25e..5042ff8d449a 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue *queue)
 		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
 
 	queue->remaining_credit = min(max_credit, max_burst);
+	queue->rate_limited = false;
 }
 
 void xenvif_tx_credit_callback(unsigned long data)
@@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
 		msecs_to_jiffies(queue->credit_usec / 1000);
 
 	/* Timer could already be pending in rare cases. */
-	if (timer_pending(&queue->credit_timeout))
+	if (timer_pending(&queue->credit_timeout)) {
+		queue->rate_limited = true;
 		return true;
+	}
 
 	/* Passed the point where we can replenish credit? */
 	if (time_after_eq64(now, next_credit)) {
@@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
 		mod_timer(&queue->credit_timeout,
 			  next_credit);
 		queue->credit_window_start = next_credit;
+		queue->rate_limited = true;
 
 		return true;
 	}
diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index c4208487fadc..b065eb605215 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -7,7 +7,7 @@ menu "Near Field Communication (NFC) devices"
 
 config NFC_TRF7970A
 	tristate "Texas Instruments TRF7970a NFC driver"
-	depends on SPI && NFC_DIGITAL
+	depends on SPI && NFC_DIGITAL && GPIOLIB
 	help
 	  This option enables the NFC driver for Texas Instruments' TRF7970a
 	  device. Such device supports 5 different protocols: ISO14443A,
diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index badd8167ac73..ec50027b0d8b 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -749,11 +749,9 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
 	u32 protocols;
 	int r;
 
-	info = kzalloc(sizeof(struct fdp_nci_info), GFP_KERNEL);
-	if (!info) {
-		r = -ENOMEM;
-		goto err_info_alloc;
-	}
+	info = devm_kzalloc(dev, sizeof(struct fdp_nci_info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
 
 	info->phy = phy;
 	info->phy_ops = phy_ops;
@@ -775,8 +773,7 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
 				   tx_tailroom);
 	if (!ndev) {
 		nfc_err(dev, "Cannot allocate nfc ndev\n");
-		r = -ENOMEM;
-		goto err_alloc_ndev;
+		return -ENOMEM;
 	}
 
 	r = nci_register_device(ndev);
@@ -792,9 +789,6 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
 
 err_regdev:
 	nci_free_device(ndev);
-err_alloc_ndev:
-	kfree(info);
-err_info_alloc:
 	return r;
 }
 EXPORT_SYMBOL(fdp_nci_probe);
@@ -808,7 +802,6 @@ void fdp_nci_remove(struct nci_dev *ndev)
 
 	nci_unregister_device(ndev);
 	nci_free_device(ndev);
-	kfree(info);
 }
 EXPORT_SYMBOL(fdp_nci_remove);
 
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index e0baec848ff2..c4da50e07bbc 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -27,7 +27,6 @@
 
 #define FDP_I2C_DRIVER_NAME	"fdp_nci_i2c"
 
-#define FDP_DP_POWER_GPIO_NAME	"power"
 #define FDP_DP_CLOCK_TYPE_NAME	"clock-type"
 #define FDP_DP_CLOCK_FREQ_NAME	"clock-freq"
 #define FDP_DP_FW_VSC_CFG_NAME	"fw-vsc-cfg"
@@ -281,8 +280,14 @@ vsc_read_err:
 		*clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no");
 }
 
-static int fdp_nci_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static const struct acpi_gpio_params power_gpios = { 0, 0, false };
+
+static const struct acpi_gpio_mapping acpi_fdp_gpios[] = {
+	{ "power-gpios", &power_gpios, 1 },
+	{},
+};
+
+static int fdp_nci_i2c_probe(struct i2c_client *client)
 {
 	struct fdp_i2c_phy *phy;
 	struct device *dev = &client->dev;
@@ -304,8 +309,7 @@ static int fdp_nci_i2c_probe(struct i2c_client *client,
 		return -ENODEV;
 	}
 
-	phy = devm_kzalloc(dev, sizeof(struct fdp_i2c_phy),
-			   GFP_KERNEL);
+	phy = devm_kzalloc(dev, sizeof(struct fdp_i2c_phy), GFP_KERNEL);
 	if (!phy)
 		return -ENOMEM;
 
@@ -313,19 +317,22 @@ static int fdp_nci_i2c_probe(struct i2c_client *client,
 	phy->next_read_size = FDP_NCI_I2C_MIN_PAYLOAD;
 	i2c_set_clientdata(client, phy);
 
-	r = request_threaded_irq(client->irq, NULL, fdp_nci_i2c_irq_thread_fn,
-				 IRQF_TRIGGER_RISING | IRQF_ONESHOT,
-				 FDP_I2C_DRIVER_NAME, phy);
+	r = devm_request_threaded_irq(dev, client->irq,
+				      NULL, fdp_nci_i2c_irq_thread_fn,
+				      IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+				      FDP_I2C_DRIVER_NAME, phy);
 
 	if (r < 0) {
 		nfc_err(&client->dev, "Unable to register IRQ handler\n");
 		return r;
 	}
 
-	/* Requesting the power gpio */
-	phy->power_gpio = devm_gpiod_get(dev, FDP_DP_POWER_GPIO_NAME,
-					 GPIOD_OUT_LOW);
+	r = devm_acpi_dev_add_driver_gpios(dev, acpi_fdp_gpios);
+	if (r)
+		dev_dbg(dev, "Unable to add GPIO mapping table\n");
 
+	/* Requesting the power gpio */
+	phy->power_gpio = devm_gpiod_get(dev, "power", GPIOD_OUT_LOW);
 	if (IS_ERR(phy->power_gpio)) {
 		nfc_err(dev, "Power GPIO request failed\n");
 		return PTR_ERR(phy->power_gpio);
@@ -360,12 +367,6 @@ static int fdp_nci_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-static struct i2c_device_id fdp_nci_i2c_id_table[] = {
-	{"int339a", 0},
-	{}
-};
-MODULE_DEVICE_TABLE(i2c, fdp_nci_i2c_id_table);
-
 static const struct acpi_device_id fdp_nci_i2c_acpi_match[] = {
 	{"INT339A", 0},
 	{}
@@ -377,8 +378,7 @@ static struct i2c_driver fdp_nci_i2c_driver = {
 		   .name = FDP_I2C_DRIVER_NAME,
 		   .acpi_match_table = ACPI_PTR(fdp_nci_i2c_acpi_match),
 		  },
-	.id_table = fdp_nci_i2c_id_table,
-	.probe = fdp_nci_i2c_probe,
+	.probe_new = fdp_nci_i2c_probe,
 	.remove = fdp_nci_i2c_remove,
 };
 module_i2c_driver(fdp_nci_i2c_driver);
diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c
index f9f000c546d1..7f8960a46aab 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.c
+++ b/drivers/nfc/nfcmrvl/fw_dnld.c
@@ -457,7 +457,7 @@ int	nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv)
 
 	INIT_WORK(&priv->fw_dnld.rx_work, fw_dnld_rx_work);
 	snprintf(name, sizeof(name), "%s_nfcmrvl_fw_dnld_rx_wq",
-		 dev_name(priv->dev));
+		 dev_name(&priv->ndev->nfc_dev->dev));
 	priv->fw_dnld.rx_wq = create_singlethread_workqueue(name);
 	if (!priv->fw_dnld.rx_wq)
 		return -ENOMEM;
@@ -494,6 +494,7 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name)
 {
 	struct nfcmrvl_private *priv = nci_get_drvdata(ndev);
 	struct nfcmrvl_fw_dnld *fw_dnld = &priv->fw_dnld;
+	int res;
 
 	if (!priv->support_fw_dnld)
 		return -ENOTSUPP;
@@ -509,7 +510,9 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name)
 	 */
 
 	/* Retrieve FW binary */
-	if (request_firmware(&fw_dnld->fw, firmware_name, priv->dev) < 0) {
+	res = request_firmware(&fw_dnld->fw, firmware_name,
+			       &ndev->nfc_dev->dev);
+	if (res < 0) {
 		nfc_err(priv->dev, "failed to retrieve FW %s", firmware_name);
 		return -ENOENT;
 	}
diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c
index c5038e6447bd..e65d027b91fa 100644
--- a/drivers/nfc/nfcmrvl/main.c
+++ b/drivers/nfc/nfcmrvl/main.c
@@ -123,13 +123,14 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
 
 	memcpy(&priv->config, pdata, sizeof(*pdata));
 
-	if (priv->config.reset_n_io) {
-		rc = devm_gpio_request_one(dev,
-					   priv->config.reset_n_io,
-					   GPIOF_OUT_INIT_LOW,
-					   "nfcmrvl_reset_n");
-		if (rc < 0)
+	if (gpio_is_valid(priv->config.reset_n_io)) {
+		rc = gpio_request_one(priv->config.reset_n_io,
+				      GPIOF_OUT_INIT_LOW,
+				      "nfcmrvl_reset_n");
+		if (rc < 0) {
+			priv->config.reset_n_io = -EINVAL;
 			nfc_err(dev, "failed to request reset_n io\n");
+		}
 	}
 
 	if (phy == NFCMRVL_PHY_SPI) {
@@ -154,7 +155,13 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
 	if (!priv->ndev) {
 		nfc_err(dev, "nci_allocate_device failed\n");
 		rc = -ENOMEM;
-		goto error;
+		goto error_free_gpio;
+	}
+
+	rc = nfcmrvl_fw_dnld_init(priv);
+	if (rc) {
+		nfc_err(dev, "failed to initialize FW download %d\n", rc);
+		goto error_free_dev;
 	}
 
 	nci_set_drvdata(priv->ndev, priv);
@@ -162,24 +169,22 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
 	rc = nci_register_device(priv->ndev);
 	if (rc) {
 		nfc_err(dev, "nci_register_device failed %d\n", rc);
-		goto error_free_dev;
+		goto error_fw_dnld_deinit;
 	}
 
 	/* Ensure that controller is powered off */
 	nfcmrvl_chip_halt(priv);
 
-	rc = nfcmrvl_fw_dnld_init(priv);
-	if (rc) {
-		nfc_err(dev, "failed to initialize FW download %d\n", rc);
-		goto error_free_dev;
-	}
-
 	nfc_info(dev, "registered with nci successfully\n");
 	return priv;
 
+error_fw_dnld_deinit:
+	nfcmrvl_fw_dnld_deinit(priv);
 error_free_dev:
 	nci_free_device(priv->ndev);
-error:
+error_free_gpio:
+	if (gpio_is_valid(priv->config.reset_n_io))
+		gpio_free(priv->config.reset_n_io);
 	kfree(priv);
 	return ERR_PTR(rc);
 }
@@ -194,8 +199,8 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
 
 	nfcmrvl_fw_dnld_deinit(priv);
 
-	if (priv->config.reset_n_io)
-		devm_gpio_free(priv->dev, priv->config.reset_n_io);
+	if (gpio_is_valid(priv->config.reset_n_io))
+		gpio_free(priv->config.reset_n_io);
 
 	nci_unregister_device(ndev);
 	nci_free_device(ndev);
@@ -262,7 +267,6 @@ int nfcmrvl_parse_dt(struct device_node *node,
 	reset_n_io = of_get_named_gpio(node, "reset-n-io", 0);
 	if (reset_n_io < 0) {
 		pr_info("no reset-n-io config\n");
-		reset_n_io = 0;
 	} else if (!gpio_is_valid(reset_n_io)) {
 		pr_err("invalid reset-n-io GPIO\n");
 		return reset_n_io;
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index 83a99e38e7bd..91162f8e0366 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -84,6 +84,7 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
 	ret = nfcmrvl_parse_dt(matched_node, pdata);
 	if (ret < 0) {
 		pr_err("Failed to get generic entries\n");
+		of_node_put(matched_node);
 		return ret;
 	}
 
@@ -97,6 +98,8 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
 	else
 		pdata->break_control = 0;
 
+	of_node_put(matched_node);
+
 	return 0;
 }
 
@@ -109,6 +112,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
 	struct nfcmrvl_private *priv;
 	struct nfcmrvl_platform_data *pdata = NULL;
 	struct nfcmrvl_platform_data config;
+	struct device *dev = nu->tty->dev;
 
 	/*
 	 * Platform data cannot be used here since usually it is already used
@@ -116,9 +120,8 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
 	 * and check if DT entries were added.
 	 */
 
-	if (nu->tty->dev->parent && nu->tty->dev->parent->of_node)
-		if (nfcmrvl_uart_parse_dt(nu->tty->dev->parent->of_node,
-					  &config) == 0)
+	if (dev && dev->parent && dev->parent->of_node)
+		if (nfcmrvl_uart_parse_dt(dev->parent->of_node, &config) == 0)
 			pdata = &config;
 
 	if (!pdata) {
@@ -131,7 +134,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
 	}
 
 	priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_UART, nu, &uart_ops,
-					nu->tty->dev, pdata);
+					dev, pdata);
 	if (IS_ERR(priv))
 		return PTR_ERR(priv);
 
diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c
index 699aa9d16575..bd35eab652be 100644
--- a/drivers/nfc/nfcmrvl/usb.c
+++ b/drivers/nfc/nfcmrvl/usb.c
@@ -341,15 +341,13 @@ static int nfcmrvl_probe(struct usb_interface *intf,
 	init_usb_anchor(&drv_data->deferred);
 
 	priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_USB, drv_data, &usb_ops,
-					&drv_data->udev->dev, &config);
+					&intf->dev, &config);
 	if (IS_ERR(priv))
 		return PTR_ERR(priv);
 
 	drv_data->priv = priv;
 	drv_data->priv->support_fw_dnld = false;
 
-	priv->dev = &drv_data->udev->dev;
-
 	usb_set_intfdata(intf, drv_data);
 
 	return 0;
diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c
index a466e7978466..33449820e754 100644
--- a/drivers/nfc/nfcsim.c
+++ b/drivers/nfc/nfcsim.c
@@ -482,8 +482,10 @@ static int __init nfcsim_init(void)
 exit_err:
 	pr_err("Failed to initialize nfcsim driver (%d)\n", rc);
 
-	nfcsim_link_free(link0);
-	nfcsim_link_free(link1);
+	if (link0)
+		nfcsim_link_free(link0);
+	if (link1)
+		nfcsim_link_free(link1);
 
 	return rc;
 }
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index fedde9d46ab6..4b14740edb67 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -904,7 +904,7 @@ static int pn544_hci_i2c_probe(struct i2c_client *client,
 	phy->i2c_dev = client;
 	i2c_set_clientdata(client, phy);
 
-	r = acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), acpi_pn544_gpios);
+	r = devm_acpi_dev_add_driver_gpios(dev, acpi_pn544_gpios);
 	if (r)
 		dev_dbg(dev, "Unable to add GPIO mapping table\n");
 
@@ -958,7 +958,6 @@ static int pn544_hci_i2c_remove(struct i2c_client *client)
 	if (phy->powered)
 		pn544_hci_i2c_disable(phy);
 
-	acpi_dev_remove_driver_gpios(ACPI_COMPANION(&client->dev));
 	return 0;
 }
 
diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c
index 9dfae0efa922..515f08d037fb 100644
--- a/drivers/nfc/st-nci/i2c.c
+++ b/drivers/nfc/st-nci/i2c.c
@@ -19,15 +19,12 @@
 
 #include <linux/module.h>
 #include <linux/i2c.h>
-#include <linux/gpio.h>
 #include <linux/gpio/consumer.h>
-#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
 #include <linux/acpi.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/nfc.h>
-#include <linux/platform_data/st-nci.h>
+#include <linux/of.h>
 
 #include "st-nci.h"
 
@@ -40,18 +37,16 @@
 #define ST_NCI_I2C_MIN_SIZE 4   /* PCB(1) + NCI Packet header(3) */
 #define ST_NCI_I2C_MAX_SIZE 250 /* req 4.2.1 */
 
+#define ST_NCI_DRIVER_NAME "st_nci"
 #define ST_NCI_I2C_DRIVER_NAME "st_nci_i2c"
 
-#define ST_NCI_GPIO_NAME_RESET "reset"
-
 struct st_nci_i2c_phy {
 	struct i2c_client *i2c_dev;
 	struct llt_ndlc *ndlc;
 
 	bool irq_active;
 
-	unsigned int gpio_reset;
-	unsigned int irq_polarity;
+	struct gpio_desc *gpiod_reset;
 
 	struct st_nci_se_status se_status;
 };
@@ -60,9 +55,9 @@ static int st_nci_i2c_enable(void *phy_id)
 {
 	struct st_nci_i2c_phy *phy = phy_id;
 
-	gpio_set_value(phy->gpio_reset, 0);
+	gpiod_set_value(phy->gpiod_reset, 0);
 	usleep_range(10000, 15000);
-	gpio_set_value(phy->gpio_reset, 1);
+	gpiod_set_value(phy->gpiod_reset, 1);
 	usleep_range(80000, 85000);
 
 	if (phy->ndlc->powered == 0 && phy->irq_active == 0) {
@@ -208,114 +203,18 @@ static struct nfc_phy_ops i2c_phy_ops = {
 	.disable = st_nci_i2c_disable,
 };
 
-static int st_nci_i2c_acpi_request_resources(struct i2c_client *client)
-{
-	struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
-	struct gpio_desc *gpiod_reset;
-	struct device *dev = &client->dev;
-	u8 tmp;
-
-	/* Get RESET GPIO from ACPI */
-	gpiod_reset = devm_gpiod_get_index(dev, ST_NCI_GPIO_NAME_RESET, 1,
-					   GPIOD_OUT_HIGH);
-	if (IS_ERR(gpiod_reset)) {
-		nfc_err(dev, "Unable to get RESET GPIO\n");
-		return -ENODEV;
-	}
-
-	phy->gpio_reset = desc_to_gpio(gpiod_reset);
-
-	phy->irq_polarity = irq_get_trigger_type(client->irq);
-
-	phy->se_status.is_ese_present = false;
-	phy->se_status.is_uicc_present = false;
-
-	if (device_property_present(dev, "ese-present")) {
-		device_property_read_u8(dev, "ese-present", &tmp);
-		phy->se_status.is_ese_present = tmp;
-	}
-
-	if (device_property_present(dev, "uicc-present")) {
-		device_property_read_u8(dev, "uicc-present", &tmp);
-		phy->se_status.is_uicc_present = tmp;
-	}
-
-	return 0;
-}
-
-static int st_nci_i2c_of_request_resources(struct i2c_client *client)
-{
-	struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
-	struct device_node *pp;
-	int gpio;
-	int r;
-
-	pp = client->dev.of_node;
-	if (!pp)
-		return -ENODEV;
-
-	/* Get GPIO from device tree */
-	gpio = of_get_named_gpio(pp, "reset-gpios", 0);
-	if (gpio < 0) {
-		nfc_err(&client->dev,
-			"Failed to retrieve reset-gpios from device tree\n");
-		return gpio;
-	}
-
-	/* GPIO request and configuration */
-	r = devm_gpio_request_one(&client->dev, gpio,
-				GPIOF_OUT_INIT_HIGH, ST_NCI_GPIO_NAME_RESET);
-	if (r) {
-		nfc_err(&client->dev, "Failed to request reset pin\n");
-		return r;
-	}
-	phy->gpio_reset = gpio;
-
-	phy->irq_polarity = irq_get_trigger_type(client->irq);
-
-	phy->se_status.is_ese_present =
-				of_property_read_bool(pp, "ese-present");
-	phy->se_status.is_uicc_present =
-				of_property_read_bool(pp, "uicc-present");
-
-	return 0;
-}
-
-static int st_nci_i2c_request_resources(struct i2c_client *client)
-{
-	struct st_nci_nfc_platform_data *pdata;
-	struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
-	int r;
-
-	pdata = client->dev.platform_data;
-	if (pdata == NULL) {
-		nfc_err(&client->dev, "No platform data\n");
-		return -EINVAL;
-	}
+static const struct acpi_gpio_params reset_gpios = { 1, 0, false };
 
-	/* store for later use */
-	phy->gpio_reset = pdata->gpio_reset;
-	phy->irq_polarity = pdata->irq_polarity;
-
-	r = devm_gpio_request_one(&client->dev,
-			phy->gpio_reset, GPIOF_OUT_INIT_HIGH,
-			ST_NCI_GPIO_NAME_RESET);
-	if (r) {
-		pr_err("%s : reset gpio_request failed\n", __FILE__);
-		return r;
-	}
-
-	phy->se_status.is_ese_present = pdata->is_ese_present;
-	phy->se_status.is_uicc_present = pdata->is_uicc_present;
-
-	return 0;
-}
+static const struct acpi_gpio_mapping acpi_st_nci_gpios[] = {
+	{ "reset-gpios", &reset_gpios, 1 },
+	{},
+};
 
 static int st_nci_i2c_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
+	struct device *dev = &client->dev;
 	struct st_nci_i2c_phy *phy;
-	struct st_nci_nfc_platform_data *pdata;
 	int r;
 
 	dev_dbg(&client->dev, "%s\n", __func__);
@@ -326,8 +225,7 @@ static int st_nci_i2c_probe(struct i2c_client *client,
 		return -ENODEV;
 	}
 
-	phy = devm_kzalloc(&client->dev, sizeof(struct st_nci_i2c_phy),
-			   GFP_KERNEL);
+	phy = devm_kzalloc(dev, sizeof(struct st_nci_i2c_phy), GFP_KERNEL);
 	if (!phy)
 		return -ENOMEM;
 
@@ -335,32 +233,22 @@ static int st_nci_i2c_probe(struct i2c_client *client,
 
 	i2c_set_clientdata(client, phy);
 
-	pdata = client->dev.platform_data;
-	if (!pdata && client->dev.of_node) {
-		r = st_nci_i2c_of_request_resources(client);
-		if (r) {
-			nfc_err(&client->dev, "No platform data\n");
-			return r;
-		}
-	} else if (pdata) {
-		r = st_nci_i2c_request_resources(client);
-		if (r) {
-			nfc_err(&client->dev,
-				"Cannot get platform resources\n");
-			return r;
-		}
-	} else if (ACPI_HANDLE(&client->dev)) {
-		r = st_nci_i2c_acpi_request_resources(client);
-		if (r) {
-			nfc_err(&client->dev, "Cannot get ACPI data\n");
-			return r;
-		}
-	} else {
-		nfc_err(&client->dev,
-			"st_nci platform resources not available\n");
+	r = devm_acpi_dev_add_driver_gpios(dev, acpi_st_nci_gpios);
+	if (r)
+		dev_dbg(dev, "Unable to add GPIO mapping table\n");
+
+	/* Get RESET GPIO */
+	phy->gpiod_reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(phy->gpiod_reset)) {
+		nfc_err(dev, "Unable to get RESET GPIO\n");
 		return -ENODEV;
 	}
 
+	phy->se_status.is_ese_present =
+				device_property_read_bool(dev, "ese-present");
+	phy->se_status.is_uicc_present =
+				device_property_read_bool(dev, "uicc-present");
+
 	r = ndlc_probe(phy, &i2c_phy_ops, &client->dev,
 			ST_NCI_FRAME_HEADROOM, ST_NCI_FRAME_TAILROOM,
 			&phy->ndlc, &phy->se_status);
@@ -372,7 +260,7 @@ static int st_nci_i2c_probe(struct i2c_client *client,
 	phy->irq_active = true;
 	r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
 				st_nci_irq_thread_fn,
-				phy->irq_polarity | IRQF_ONESHOT,
+				IRQF_ONESHOT,
 				ST_NCI_DRIVER_NAME, phy);
 	if (r < 0)
 		nfc_err(&client->dev, "Unable to register IRQ handler\n");
diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index 89e341eba3eb..14705591b0fb 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -19,16 +19,13 @@
 
 #include <linux/module.h>
 #include <linux/spi/spi.h>
-#include <linux/gpio.h>
 #include <linux/gpio/consumer.h>
-#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
 #include <linux/acpi.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/nfc.h>
+#include <linux/of.h>
 #include <net/nfc/nci.h>
-#include <linux/platform_data/st-nci.h>
 
 #include "st-nci.h"
 
@@ -41,18 +38,16 @@
 #define ST_NCI_SPI_MIN_SIZE 4   /* PCB(1) + NCI Packet header(3) */
 #define ST_NCI_SPI_MAX_SIZE 250 /* req 4.2.1 */
 
+#define ST_NCI_DRIVER_NAME "st_nci"
 #define ST_NCI_SPI_DRIVER_NAME "st_nci_spi"
 
-#define ST_NCI_GPIO_NAME_RESET "reset"
-
 struct st_nci_spi_phy {
 	struct spi_device *spi_dev;
 	struct llt_ndlc *ndlc;
 
 	bool irq_active;
 
-	unsigned int gpio_reset;
-	unsigned int irq_polarity;
+	struct gpio_desc *gpiod_reset;
 
 	struct st_nci_se_status se_status;
 };
@@ -61,9 +56,9 @@ static int st_nci_spi_enable(void *phy_id)
 {
 	struct st_nci_spi_phy *phy = phy_id;
 
-	gpio_set_value(phy->gpio_reset, 0);
+	gpiod_set_value(phy->gpiod_reset, 0);
 	usleep_range(10000, 15000);
-	gpio_set_value(phy->gpio_reset, 1);
+	gpiod_set_value(phy->gpiod_reset, 1);
 	usleep_range(80000, 85000);
 
 	if (phy->ndlc->powered == 0 && phy->irq_active == 0) {
@@ -223,113 +218,16 @@ static struct nfc_phy_ops spi_phy_ops = {
 	.disable = st_nci_spi_disable,
 };
 
-static int st_nci_spi_acpi_request_resources(struct spi_device *spi_dev)
-{
-	struct st_nci_spi_phy *phy = spi_get_drvdata(spi_dev);
-	struct gpio_desc *gpiod_reset;
-	struct device *dev = &spi_dev->dev;
-	u8 tmp;
-
-	/* Get RESET GPIO from ACPI */
-	gpiod_reset = devm_gpiod_get_index(dev, ST_NCI_GPIO_NAME_RESET, 1,
-					   GPIOD_OUT_HIGH);
-	if (IS_ERR(gpiod_reset)) {
-		nfc_err(dev, "Unable to get RESET GPIO\n");
-		return -ENODEV;
-	}
-
-	phy->gpio_reset = desc_to_gpio(gpiod_reset);
-
-	phy->irq_polarity = irq_get_trigger_type(spi_dev->irq);
-
-	phy->se_status.is_ese_present = false;
-	phy->se_status.is_uicc_present = false;
-
-	if (device_property_present(dev, "ese-present")) {
-		device_property_read_u8(dev, "ese-present", &tmp);
-		tmp = phy->se_status.is_ese_present;
-	}
-
-	if (device_property_present(dev, "uicc-present")) {
-		device_property_read_u8(dev, "uicc-present", &tmp);
-		tmp = phy->se_status.is_uicc_present;
-	}
-
-	return 0;
-}
-
-static int st_nci_spi_of_request_resources(struct spi_device *dev)
-{
-	struct st_nci_spi_phy *phy = spi_get_drvdata(dev);
-	struct device_node *pp;
-	int gpio;
-	int r;
-
-	pp = dev->dev.of_node;
-	if (!pp)
-		return -ENODEV;
-
-	/* Get GPIO from device tree */
-	gpio = of_get_named_gpio(pp, "reset-gpios", 0);
-	if (gpio < 0) {
-		nfc_err(&dev->dev,
-			"Failed to retrieve reset-gpios from device tree\n");
-		return gpio;
-	}
-
-	/* GPIO request and configuration */
-	r = devm_gpio_request_one(&dev->dev, gpio,
-				GPIOF_OUT_INIT_HIGH, ST_NCI_GPIO_NAME_RESET);
-	if (r) {
-		nfc_err(&dev->dev, "Failed to request reset pin\n");
-		return r;
-	}
-	phy->gpio_reset = gpio;
-
-	phy->irq_polarity = irq_get_trigger_type(dev->irq);
+static const struct acpi_gpio_params reset_gpios = { 1, 0, false };
 
-	phy->se_status.is_ese_present =
-				of_property_read_bool(pp, "ese-present");
-	phy->se_status.is_uicc_present =
-				of_property_read_bool(pp, "uicc-present");
-
-	return 0;
-}
-
-static int st_nci_spi_request_resources(struct spi_device *dev)
-{
-	struct st_nci_nfc_platform_data *pdata;
-	struct st_nci_spi_phy *phy = spi_get_drvdata(dev);
-	int r;
-
-	pdata = dev->dev.platform_data;
-	if (pdata == NULL) {
-		nfc_err(&dev->dev, "No platform data\n");
-		return -EINVAL;
-	}
-
-	/* store for later use */
-	phy->gpio_reset = pdata->gpio_reset;
-	phy->irq_polarity = pdata->irq_polarity;
-
-	r = devm_gpio_request_one(&dev->dev,
-			phy->gpio_reset, GPIOF_OUT_INIT_HIGH,
-			ST_NCI_GPIO_NAME_RESET);
-	if (r) {
-		pr_err("%s : reset gpio_request failed\n", __FILE__);
-		return r;
-	}
-
-	phy->se_status.is_ese_present = pdata->is_ese_present;
-	phy->se_status.is_uicc_present = pdata->is_uicc_present;
-
-	return 0;
-}
+static const struct acpi_gpio_mapping acpi_st_nci_gpios[] = {
+	{ "reset-gpios", &reset_gpios, 1 },
+	{},
+};
 
 static int st_nci_spi_probe(struct spi_device *dev)
 {
 	struct st_nci_spi_phy *phy;
-	struct st_nci_nfc_platform_data *pdata;
 	int r;
 
 	dev_dbg(&dev->dev, "%s\n", __func__);
@@ -351,32 +249,22 @@ static int st_nci_spi_probe(struct spi_device *dev)
 
 	spi_set_drvdata(dev, phy);
 
-	pdata = dev->dev.platform_data;
-	if (!pdata && dev->dev.of_node) {
-		r = st_nci_spi_of_request_resources(dev);
-		if (r) {
-			nfc_err(&dev->dev, "No platform data\n");
-			return r;
-		}
-	} else if (pdata) {
-		r = st_nci_spi_request_resources(dev);
-		if (r) {
-			nfc_err(&dev->dev,
-				"Cannot get platform resources\n");
-			return r;
-		}
-	} else if (ACPI_HANDLE(&dev->dev)) {
-		r = st_nci_spi_acpi_request_resources(dev);
-		if (r) {
-			nfc_err(&dev->dev, "Cannot get ACPI data\n");
-			return r;
-		}
-	} else {
-		nfc_err(&dev->dev,
-			"st_nci platform resources not available\n");
-		return -ENODEV;
+	r = devm_acpi_dev_add_driver_gpios(&dev->dev, acpi_st_nci_gpios);
+	if (r)
+		dev_dbg(&dev->dev, "Unable to add GPIO mapping table\n");
+
+	/* Get RESET GPIO */
+	phy->gpiod_reset = devm_gpiod_get(&dev->dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(phy->gpiod_reset)) {
+		nfc_err(&dev->dev, "Unable to get RESET GPIO\n");
+		return PTR_ERR(phy->gpiod_reset);
 	}
 
+	phy->se_status.is_ese_present =
+			device_property_read_bool(&dev->dev, "ese-present");
+	phy->se_status.is_uicc_present =
+			device_property_read_bool(&dev->dev, "uicc-present");
+
 	r = ndlc_probe(phy, &spi_phy_ops, &dev->dev,
 			ST_NCI_FRAME_HEADROOM, ST_NCI_FRAME_TAILROOM,
 			&phy->ndlc, &phy->se_status);
@@ -388,7 +276,7 @@ static int st_nci_spi_probe(struct spi_device *dev)
 	phy->irq_active = true;
 	r = devm_request_threaded_irq(&dev->dev, dev->irq, NULL,
 				st_nci_irq_thread_fn,
-				phy->irq_polarity | IRQF_ONESHOT,
+				IRQF_ONESHOT,
 				ST_NCI_SPI_DRIVER_NAME, phy);
 	if (r < 0)
 		nfc_err(&dev->dev, "Unable to register IRQ handler\n");
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 4bff76baa341..cd1f7bfa75eb 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -61,8 +61,6 @@
 #define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci"
 #define ST21NFCA_HCI_I2C_DRIVER_NAME "st21nfca_hci_i2c"
 
-#define ST21NFCA_GPIO_NAME_EN "enable"
-
 struct st21nfca_i2c_phy {
 	struct i2c_client *i2c_dev;
 	struct nfc_hci_dev *hdev;
@@ -501,41 +499,17 @@ static struct nfc_phy_ops i2c_phy_ops = {
 	.disable = st21nfca_hci_i2c_disable,
 };
 
-static int st21nfca_hci_i2c_acpi_request_resources(struct i2c_client *client)
-{
-	struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
-	struct device *dev = &client->dev;
-
-	/* Get EN GPIO from ACPI */
-	phy->gpiod_ena = devm_gpiod_get_index(dev, ST21NFCA_GPIO_NAME_EN, 1,
-					      GPIOD_OUT_LOW);
-	if (IS_ERR(phy->gpiod_ena)) {
-		nfc_err(dev, "Unable to get ENABLE GPIO\n");
-		return PTR_ERR(phy->gpiod_ena);
-	}
-
-	return 0;
-}
-
-static int st21nfca_hci_i2c_of_request_resources(struct i2c_client *client)
-{
-	struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
-	struct device *dev = &client->dev;
-
-	/* Get GPIO from device tree */
-	phy->gpiod_ena = devm_gpiod_get_index(dev, ST21NFCA_GPIO_NAME_EN, 0,
-					      GPIOD_OUT_HIGH);
-	if (IS_ERR(phy->gpiod_ena)) {
-		nfc_err(dev, "Failed to request enable pin\n");
-		return PTR_ERR(phy->gpiod_ena);
-	}
+static const struct acpi_gpio_params enable_gpios = { 1, 0, false };
 
-	return 0;
-}
+static const struct acpi_gpio_mapping acpi_st21nfca_gpios[] = {
+	{ "enable-gpios", &enable_gpios, 1 },
+	{},
+};
 
 static int st21nfca_hci_i2c_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
+	struct device *dev = &client->dev;
 	struct st21nfca_i2c_phy *phy;
 	int r;
 
@@ -562,21 +536,15 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
 	mutex_init(&phy->phy_lock);
 	i2c_set_clientdata(client, phy);
 
-	if (client->dev.of_node) {
-		r = st21nfca_hci_i2c_of_request_resources(client);
-		if (r) {
-			nfc_err(&client->dev, "No platform data\n");
-			return r;
-		}
-	} else if (ACPI_HANDLE(&client->dev)) {
-		r = st21nfca_hci_i2c_acpi_request_resources(client);
-		if (r) {
-			nfc_err(&client->dev, "Cannot get ACPI data\n");
-			return r;
-		}
-	} else {
-		nfc_err(&client->dev, "st21nfca platform resources not available\n");
-		return -ENODEV;
+	r = devm_acpi_dev_add_driver_gpios(dev, acpi_st21nfca_gpios);
+	if (r)
+		dev_dbg(dev, "Unable to add GPIO mapping table\n");
+
+	/* Get EN GPIO from resource provider */
+	phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
+	if (IS_ERR(phy->gpiod_ena)) {
+		nfc_err(dev, "Unable to get ENABLE GPIO\n");
+		return PTR_ERR(phy->gpiod_ena);
 	}
 
 	phy->se_status.is_ese_present =
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index 2d1c8ca6e679..eee5cc1a9220 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -20,9 +20,8 @@
 #include <linux/nfc.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/spi/spi.h>
 #include <linux/regulator/consumer.h>
 
@@ -123,11 +122,10 @@
 		 NFC_PROTO_ISO14443_B_MASK | NFC_PROTO_FELICA_MASK | \
 		 NFC_PROTO_ISO15693_MASK | NFC_PROTO_NFC_DEP_MASK)
 
-#define TRF7970A_AUTOSUSPEND_DELAY		30000 /* 30 seconds */
+#define TRF7970A_AUTOSUSPEND_DELAY		30000	/* 30 seconds */
 #define TRF7970A_13MHZ_CLOCK_FREQUENCY		13560000
 #define TRF7970A_27MHZ_CLOCK_FREQUENCY		27120000
 
-
 #define TRF7970A_RX_SKB_ALLOC_SIZE		256
 
 #define TRF7970A_FIFO_SIZE			127
@@ -152,7 +150,6 @@
  */
 #define TRF7970A_QUIRK_IRQ_STATUS_READ		BIT(0)
 #define TRF7970A_QUIRK_EN2_MUST_STAY_LOW	BIT(1)
-#define TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE	BIT(2)
 
 /* Direct commands */
 #define TRF7970A_CMD_IDLE			0x00
@@ -295,7 +292,7 @@
 #define TRF7970A_REG_IO_CTRL_AUTO_REG		BIT(7)
 
 /* IRQ Status Register Bits */
-#define TRF7970A_IRQ_STATUS_NORESP		BIT(0) /* ISO15693 only */
+#define TRF7970A_IRQ_STATUS_NORESP		BIT(0)	/* ISO15693 only */
 #define TRF7970A_IRQ_STATUS_NFC_COL_ERROR	BIT(0)
 #define TRF7970A_IRQ_STATUS_COL			BIT(1)
 #define TRF7970A_IRQ_STATUS_FRAMING_EOF_ERROR	BIT(2)
@@ -451,16 +448,14 @@ struct trf7970a {
 	u8				md_rf_tech;
 	u8				tx_cmd;
 	bool				issue_eof;
-	bool				adjust_resp_len;
-	int				en2_gpio;
-	int				en_gpio;
+	struct gpio_desc		*en_gpiod;
+	struct gpio_desc		*en2_gpiod;
 	struct mutex			lock;
 	unsigned int			timeout;
 	bool				ignore_timeout;
 	struct delayed_work		timeout_work;
 };
 
-
 static int trf7970a_cmd(struct trf7970a *trf, u8 opcode)
 {
 	u8 cmd = TRF7970A_CMD_BIT_CTRL | TRF7970A_CMD_BIT_OPCODE(opcode);
@@ -471,7 +466,7 @@ static int trf7970a_cmd(struct trf7970a *trf, u8 opcode)
 	ret = spi_write(trf->spi, &cmd, 1);
 	if (ret)
 		dev_err(trf->dev, "%s - cmd: 0x%x, ret: %d\n", __func__, cmd,
-				ret);
+			ret);
 	return ret;
 }
 
@@ -483,14 +478,15 @@ static int trf7970a_read(struct trf7970a *trf, u8 reg, u8 *val)
 	ret = spi_write_then_read(trf->spi, &addr, 1, val, 1);
 	if (ret)
 		dev_err(trf->dev, "%s - addr: 0x%x, ret: %d\n", __func__, addr,
-				ret);
+			ret);
 
 	dev_dbg(trf->dev, "read(0x%x): 0x%x\n", addr, *val);
 
 	return ret;
 }
 
-static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf, size_t len)
+static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf,
+			      size_t len)
 {
 	u8 addr = reg | TRF7970A_CMD_BIT_RW | TRF7970A_CMD_BIT_CONTINUOUS;
 	struct spi_transfer t[2];
@@ -514,7 +510,7 @@ static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf, size_t len)
 	ret = spi_sync(trf->spi, &m);
 	if (ret)
 		dev_err(trf->dev, "%s - addr: 0x%x, ret: %d\n", __func__, addr,
-				ret);
+			ret);
 	return ret;
 }
 
@@ -528,7 +524,7 @@ static int trf7970a_write(struct trf7970a *trf, u8 reg, u8 val)
 	ret = spi_write(trf->spi, buf, 2);
 	if (ret)
 		dev_err(trf->dev, "%s - write: 0x%x 0x%x, ret: %d\n", __func__,
-				buf[0], buf[1], ret);
+			buf[0], buf[1], ret);
 
 	return ret;
 }
@@ -550,7 +546,7 @@ static int trf7970a_read_irqstatus(struct trf7970a *trf, u8 *status)
 
 	if (ret)
 		dev_err(trf->dev, "%s - irqstatus: Status read failed: %d\n",
-				__func__, ret);
+			__func__, ret);
 	else
 		*status = buf[0];
 
@@ -564,12 +560,12 @@ static int trf7970a_read_target_proto(struct trf7970a *trf, u8 *target_proto)
 	u8 addr;
 
 	addr = TRF79070A_NFC_TARGET_PROTOCOL | TRF7970A_CMD_BIT_RW |
-		TRF7970A_CMD_BIT_CONTINUOUS;
+	       TRF7970A_CMD_BIT_CONTINUOUS;
 
 	ret = spi_write_then_read(trf->spi, &addr, 1, buf, 2);
 	if (ret)
 		dev_err(trf->dev, "%s - target_proto: Read failed: %d\n",
-				__func__, ret);
+			__func__, ret);
 	else
 		*target_proto = buf[0];
 
@@ -600,7 +596,7 @@ static int trf7970a_mode_detect(struct trf7970a *trf, u8 *rf_tech)
 		break;
 	default:
 		dev_dbg(trf->dev, "%s - mode_detect: target_proto: 0x%x\n",
-				__func__, target_proto);
+			__func__, target_proto);
 		return -EIO;
 	}
 
@@ -616,8 +612,8 @@ static void trf7970a_send_upstream(struct trf7970a *trf)
 
 	if (trf->rx_skb && !IS_ERR(trf->rx_skb) && !trf->aborting)
 		print_hex_dump_debug("trf7970a rx data: ", DUMP_PREFIX_NONE,
-				16, 1, trf->rx_skb->data, trf->rx_skb->len,
-				false);
+				     16, 1, trf->rx_skb->data, trf->rx_skb->len,
+				     false);
 
 	trf->state = TRF7970A_ST_IDLE;
 
@@ -632,13 +628,6 @@ static void trf7970a_send_upstream(struct trf7970a *trf)
 		trf->aborting = false;
 	}
 
-	if (trf->adjust_resp_len) {
-		if (trf->rx_skb)
-			skb_trim(trf->rx_skb, trf->rx_skb->len - 1);
-
-		trf->adjust_resp_len = false;
-	}
-
 	trf->cb(trf->ddev, trf->cb_arg, trf->rx_skb);
 
 	trf->rx_skb = NULL;
@@ -657,7 +646,8 @@ static void trf7970a_send_err_upstream(struct trf7970a *trf, int errno)
 }
 
 static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
-		unsigned int len, u8 *prefix, unsigned int prefix_len)
+			     unsigned int len, u8 *prefix,
+			     unsigned int prefix_len)
 {
 	struct spi_transfer t[2];
 	struct spi_message m;
@@ -665,7 +655,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
 	int ret;
 
 	print_hex_dump_debug("trf7970a tx data: ", DUMP_PREFIX_NONE,
-			16, 1, skb->data, len, false);
+			     16, 1, skb->data, len, false);
 
 	spi_message_init(&m);
 
@@ -682,7 +672,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
 	ret = spi_sync(trf->spi, &m);
 	if (ret) {
 		dev_err(trf->dev, "%s - Can't send tx data: %d\n", __func__,
-				ret);
+			ret);
 		return ret;
 	}
 
@@ -706,7 +696,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
 	}
 
 	dev_dbg(trf->dev, "Setting timeout for %d ms, state: %d\n", timeout,
-			trf->state);
+		trf->state);
 
 	schedule_delayed_work(&trf->timeout_work, msecs_to_jiffies(timeout));
 
@@ -774,9 +764,9 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
 
 	if (fifo_bytes > skb_tailroom(skb)) {
 		skb = skb_copy_expand(skb, skb_headroom(skb),
-				max_t(int, fifo_bytes,
-					TRF7970A_RX_SKB_ALLOC_SIZE),
-				GFP_KERNEL);
+				      max_t(int, fifo_bytes,
+					    TRF7970A_RX_SKB_ALLOC_SIZE),
+				      GFP_KERNEL);
 		if (!skb) {
 			trf7970a_send_err_upstream(trf, -ENOMEM);
 			return;
@@ -787,7 +777,7 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
 	}
 
 	ret = trf7970a_read_cont(trf, TRF7970A_FIFO_IO_REGISTER,
-			skb_put(skb, fifo_bytes), fifo_bytes);
+				 skb_put(skb, fifo_bytes), fifo_bytes);
 	if (ret) {
 		trf7970a_send_err_upstream(trf, ret);
 		return;
@@ -795,8 +785,7 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
 
 	/* If received Type 2 ACK/NACK, shift right 4 bits and pass up */
 	if ((trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T) && (skb->len == 1) &&
-			(trf->special_fcn_reg1 ==
-				 TRF7970A_SPECIAL_FCN_REG1_4_BIT_RX)) {
+	    (trf->special_fcn_reg1 == TRF7970A_SPECIAL_FCN_REG1_4_BIT_RX)) {
 		skb->data[0] >>= 4;
 		status = TRF7970A_IRQ_STATUS_SRX;
 	} else {
@@ -819,16 +808,16 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
 	}
 
 no_rx_data:
-	if (status == TRF7970A_IRQ_STATUS_SRX) { /* Receive complete */
+	if (status == TRF7970A_IRQ_STATUS_SRX) {	/* Receive complete */
 		trf7970a_send_upstream(trf);
 		return;
 	}
 
 	dev_dbg(trf->dev, "Setting timeout for %d ms\n",
-			TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT);
+		TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT);
 
 	schedule_delayed_work(&trf->timeout_work,
-			msecs_to_jiffies(TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT));
+			   msecs_to_jiffies(TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT));
 }
 
 static irqreturn_t trf7970a_irq(int irq, void *dev_id)
@@ -851,7 +840,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
 	}
 
 	dev_dbg(trf->dev, "IRQ - state: %d, status: 0x%x\n", trf->state,
-			status);
+		status);
 
 	if (!status) {
 		mutex_unlock(&trf->lock);
@@ -876,7 +865,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
 	case TRF7970A_ST_WAIT_FOR_TX_FIFO:
 		if (status & TRF7970A_IRQ_STATUS_TX) {
 			trf->ignore_timeout =
-				!cancel_delayed_work(&trf->timeout_work);
+			    !cancel_delayed_work(&trf->timeout_work);
 			trf7970a_fill_fifo(trf);
 		} else {
 			trf7970a_send_err_upstream(trf, -EIO);
@@ -886,11 +875,11 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
 	case TRF7970A_ST_WAIT_FOR_RX_DATA_CONT:
 		if (status & TRF7970A_IRQ_STATUS_SRX) {
 			trf->ignore_timeout =
-				!cancel_delayed_work(&trf->timeout_work);
+			    !cancel_delayed_work(&trf->timeout_work);
 			trf7970a_drain_fifo(trf, status);
 		} else if (status & TRF7970A_IRQ_STATUS_FIFO) {
 			ret = trf7970a_read(trf, TRF7970A_FIFO_STATUS,
-					&fifo_bytes);
+					    &fifo_bytes);
 
 			fifo_bytes &= ~TRF7970A_FIFO_STATUS_OVERFLOW;
 
@@ -899,14 +888,14 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
 			else if (!fifo_bytes)
 				trf7970a_cmd(trf, TRF7970A_CMD_FIFO_RESET);
 		} else if ((status == TRF7970A_IRQ_STATUS_TX) ||
-				(!trf->is_initiator &&
-				 (status == (TRF7970A_IRQ_STATUS_TX |
-					     TRF7970A_IRQ_STATUS_NFC_RF)))) {
+			   (!trf->is_initiator &&
+			    (status == (TRF7970A_IRQ_STATUS_TX |
+					TRF7970A_IRQ_STATUS_NFC_RF)))) {
 			trf7970a_cmd(trf, TRF7970A_CMD_FIFO_RESET);
 
 			if (!trf->timeout) {
-				trf->ignore_timeout = !cancel_delayed_work(
-						&trf->timeout_work);
+				trf->ignore_timeout =
+				    !cancel_delayed_work(&trf->timeout_work);
 				trf->rx_skb = ERR_PTR(0);
 				trf7970a_send_upstream(trf);
 				break;
@@ -930,13 +919,13 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
 				break;
 			case NFC_DIGITAL_FRAMING_NFCA_ANTICOL_COMPLETE:
 				ret = trf7970a_write(trf,
-					TRF7970A_SPECIAL_FCN_REG1,
-					TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL);
+					 TRF7970A_SPECIAL_FCN_REG1,
+					 TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL);
 				if (ret)
 					goto err_unlock_exit;
 
 				trf->special_fcn_reg1 =
-					TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL;
+				    TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL;
 				break;
 			default:
 				break;
@@ -944,7 +933,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
 
 			if (iso_ctrl != trf->iso_ctrl) {
 				ret = trf7970a_write(trf, TRF7970A_ISO_CTRL,
-						iso_ctrl);
+						     iso_ctrl);
 				if (ret)
 					goto err_unlock_exit;
 
@@ -961,7 +950,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
 	case TRF7970A_ST_LISTENING:
 		if (status & TRF7970A_IRQ_STATUS_SRX) {
 			trf->ignore_timeout =
-				!cancel_delayed_work(&trf->timeout_work);
+			    !cancel_delayed_work(&trf->timeout_work);
 			trf7970a_drain_fifo(trf, status);
 		} else if (!(status & TRF7970A_IRQ_STATUS_NFC_RF)) {
 			trf7970a_send_err_upstream(trf, -EIO);
@@ -970,7 +959,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
 	case TRF7970A_ST_LISTENING_MD:
 		if (status & TRF7970A_IRQ_STATUS_SRX) {
 			trf->ignore_timeout =
-				!cancel_delayed_work(&trf->timeout_work);
+			    !cancel_delayed_work(&trf->timeout_work);
 
 			ret = trf7970a_mode_detect(trf, &trf->md_rf_tech);
 			if (ret) {
@@ -985,7 +974,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
 		break;
 	default:
 		dev_err(trf->dev, "%s - Driver in invalid state: %d\n",
-				__func__, trf->state);
+			__func__, trf->state);
 	}
 
 err_unlock_exit:
@@ -1010,19 +999,19 @@ static void trf7970a_issue_eof(struct trf7970a *trf)
 	trf->state = TRF7970A_ST_WAIT_FOR_RX_DATA;
 
 	dev_dbg(trf->dev, "Setting timeout for %d ms, state: %d\n",
-			trf->timeout, trf->state);
+		trf->timeout, trf->state);
 
 	schedule_delayed_work(&trf->timeout_work,
-			msecs_to_jiffies(trf->timeout));
+			      msecs_to_jiffies(trf->timeout));
 }
 
 static void trf7970a_timeout_work_handler(struct work_struct *work)
 {
 	struct trf7970a *trf = container_of(work, struct trf7970a,
-			timeout_work.work);
+					    timeout_work.work);
 
 	dev_dbg(trf->dev, "Timeout - state: %d, ignore_timeout: %d\n",
-			trf->state, trf->ignore_timeout);
+		trf->state, trf->ignore_timeout);
 
 	mutex_lock(&trf->lock);
 
@@ -1053,7 +1042,7 @@ static int trf7970a_init(struct trf7970a *trf)
 		goto err_out;
 
 	ret = trf7970a_write(trf, TRF7970A_REG_IO_CTRL,
-			trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
+			     trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
 	if (ret)
 		goto err_out;
 
@@ -1066,13 +1055,13 @@ static int trf7970a_init(struct trf7970a *trf)
 	trf->chip_status_ctrl &= ~TRF7970A_CHIP_STATUS_RF_ON;
 
 	ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
-			trf->modulator_sys_clk_ctrl);
+			     trf->modulator_sys_clk_ctrl);
 	if (ret)
 		goto err_out;
 
 	ret = trf7970a_write(trf, TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS,
-			TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLH_96 |
-			TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLL_32);
+			     TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLH_96 |
+			     TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLL_32);
 	if (ret)
 		goto err_out;
 
@@ -1093,7 +1082,7 @@ err_out:
 static void trf7970a_switch_rf_off(struct trf7970a *trf)
 {
 	if ((trf->state == TRF7970A_ST_PWR_OFF) ||
-			(trf->state == TRF7970A_ST_RF_OFF))
+	    (trf->state == TRF7970A_ST_RF_OFF))
 		return;
 
 	dev_dbg(trf->dev, "Switching rf off\n");
@@ -1117,9 +1106,9 @@ static int trf7970a_switch_rf_on(struct trf7970a *trf)
 
 	pm_runtime_get_sync(trf->dev);
 
-	if (trf->state != TRF7970A_ST_RF_OFF) { /* Power on, RF off */
+	if (trf->state != TRF7970A_ST_RF_OFF) {	/* Power on, RF off */
 		dev_err(trf->dev, "%s - Incorrect state: %d\n", __func__,
-				trf->state);
+			trf->state);
 		return -EINVAL;
 	}
 
@@ -1154,7 +1143,7 @@ static int trf7970a_switch_rf(struct nfc_digital_dev *ddev, bool on)
 			break;
 		default:
 			dev_err(trf->dev, "%s - Invalid request: %d %d\n",
-					__func__, trf->state, on);
+				__func__, trf->state, on);
 			trf7970a_switch_rf_off(trf);
 			ret = -EINVAL;
 		}
@@ -1165,7 +1154,7 @@ static int trf7970a_switch_rf(struct nfc_digital_dev *ddev, bool on)
 			break;
 		default:
 			dev_err(trf->dev, "%s - Invalid request: %d %d\n",
-					__func__, trf->state, on);
+				__func__, trf->state, on);
 			ret = -EINVAL;
 			/* FALLTHROUGH */
 		case TRF7970A_ST_IDLE:
@@ -1190,36 +1179,36 @@ static int trf7970a_in_config_rf_tech(struct trf7970a *trf, int tech)
 	case NFC_DIGITAL_RF_TECH_106A:
 		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_14443A_106;
 		trf->modulator_sys_clk_ctrl =
-			(trf->modulator_sys_clk_ctrl & 0xf8) |
-			TRF7970A_MODULATOR_DEPTH_OOK;
+		    (trf->modulator_sys_clk_ctrl & 0xf8) |
+		    TRF7970A_MODULATOR_DEPTH_OOK;
 		trf->guard_time = TRF7970A_GUARD_TIME_NFCA;
 		break;
 	case NFC_DIGITAL_RF_TECH_106B:
 		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_14443B_106;
 		trf->modulator_sys_clk_ctrl =
-			(trf->modulator_sys_clk_ctrl & 0xf8) |
-			TRF7970A_MODULATOR_DEPTH_ASK10;
+		    (trf->modulator_sys_clk_ctrl & 0xf8) |
+		    TRF7970A_MODULATOR_DEPTH_ASK10;
 		trf->guard_time = TRF7970A_GUARD_TIME_NFCB;
 		break;
 	case NFC_DIGITAL_RF_TECH_212F:
 		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_FELICA_212;
 		trf->modulator_sys_clk_ctrl =
-			(trf->modulator_sys_clk_ctrl & 0xf8) |
-			TRF7970A_MODULATOR_DEPTH_ASK10;
+		    (trf->modulator_sys_clk_ctrl & 0xf8) |
+		    TRF7970A_MODULATOR_DEPTH_ASK10;
 		trf->guard_time = TRF7970A_GUARD_TIME_NFCF;
 		break;
 	case NFC_DIGITAL_RF_TECH_424F:
 		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_FELICA_424;
 		trf->modulator_sys_clk_ctrl =
-			(trf->modulator_sys_clk_ctrl & 0xf8) |
-			TRF7970A_MODULATOR_DEPTH_ASK10;
+		    (trf->modulator_sys_clk_ctrl & 0xf8) |
+		    TRF7970A_MODULATOR_DEPTH_ASK10;
 		trf->guard_time = TRF7970A_GUARD_TIME_NFCF;
 		break;
 	case NFC_DIGITAL_RF_TECH_ISO15693:
 		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_15693_SGL_1OF4_2648;
 		trf->modulator_sys_clk_ctrl =
-			(trf->modulator_sys_clk_ctrl & 0xf8) |
-			TRF7970A_MODULATOR_DEPTH_OOK;
+		    (trf->modulator_sys_clk_ctrl & 0xf8) |
+		    TRF7970A_MODULATOR_DEPTH_OOK;
 		trf->guard_time = TRF7970A_GUARD_TIME_15693;
 		break;
 	default:
@@ -1246,7 +1235,8 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field)
 	u8 rssi;
 
 	ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
-			trf->chip_status_ctrl | TRF7970A_CHIP_STATUS_REC_ON);
+			     trf->chip_status_ctrl |
+			     TRF7970A_CHIP_STATUS_REC_ON);
 	if (ret)
 		return ret;
 
@@ -1261,7 +1251,7 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field)
 		return ret;
 
 	ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
-			trf->chip_status_ctrl);
+			     trf->chip_status_ctrl);
 	if (ret)
 		return ret;
 
@@ -1328,15 +1318,15 @@ static int trf7970a_in_config_framing(struct trf7970a *trf, int framing)
 		trf->iso_ctrl = iso_ctrl;
 
 		ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
-				trf->modulator_sys_clk_ctrl);
+				     trf->modulator_sys_clk_ctrl);
 		if (ret)
 			return ret;
 	}
 
 	if (!(trf->chip_status_ctrl & TRF7970A_CHIP_STATUS_RF_ON)) {
 		ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
-				trf->chip_status_ctrl |
-					TRF7970A_CHIP_STATUS_RF_ON);
+				     trf->chip_status_ctrl |
+				     TRF7970A_CHIP_STATUS_RF_ON);
 		if (ret)
 			return ret;
 
@@ -1349,7 +1339,7 @@ static int trf7970a_in_config_framing(struct trf7970a *trf, int framing)
 }
 
 static int trf7970a_in_configure_hw(struct nfc_digital_dev *ddev, int type,
-		int param)
+				    int param)
 {
 	struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
 	int ret;
@@ -1361,7 +1351,7 @@ static int trf7970a_in_configure_hw(struct nfc_digital_dev *ddev, int type,
 	trf->is_initiator = true;
 
 	if ((trf->state == TRF7970A_ST_PWR_OFF) ||
-			(trf->state == TRF7970A_ST_RF_OFF)) {
+	    (trf->state == TRF7970A_ST_RF_OFF)) {
 		ret = trf7970a_switch_rf_on(trf);
 		if (ret)
 			goto err_unlock;
@@ -1419,7 +1409,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
 	 * has to send an EOF in order to get a response.
 	 */
 	if ((trf->technology == NFC_DIGITAL_RF_TECH_106A) &&
-			(trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T)) {
+	    (trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T)) {
 		if (req[0] == NFC_T2T_CMD_READ)
 			special_fcn_reg1 = 0;
 		else
@@ -1427,7 +1417,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
 
 		if (special_fcn_reg1 != trf->special_fcn_reg1) {
 			ret = trf7970a_write(trf, TRF7970A_SPECIAL_FCN_REG1,
-					special_fcn_reg1);
+					     special_fcn_reg1);
 			if (ret)
 				return ret;
 
@@ -1447,7 +1437,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
 			iso_ctrl |= TRF7970A_ISO_CTRL_15693_SGL_1OF4_2648;
 			break;
 		case (ISO15693_REQ_FLAG_SUB_CARRIER |
-				ISO15693_REQ_FLAG_DATA_RATE):
+		      ISO15693_REQ_FLAG_DATA_RATE):
 			iso_ctrl |= TRF7970A_ISO_CTRL_15693_DBL_1OF4_2669;
 			break;
 		}
@@ -1460,23 +1450,18 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
 			trf->iso_ctrl = iso_ctrl;
 		}
 
-		if (trf->framing == NFC_DIGITAL_FRAMING_ISO15693_T5T) {
-			if (trf7970a_is_iso15693_write_or_lock(req[1]) &&
-					(req[0] & ISO15693_REQ_FLAG_OPTION))
-				trf->issue_eof = true;
-			else if ((trf->quirks &
-					TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE) &&
-				 (req[1] == ISO15693_CMD_READ_MULTIPLE_BLOCK))
-				trf->adjust_resp_len = true;
-		}
+		if ((trf->framing == NFC_DIGITAL_FRAMING_ISO15693_T5T) &&
+		    trf7970a_is_iso15693_write_or_lock(req[1]) &&
+		    (req[0] & ISO15693_REQ_FLAG_OPTION))
+			trf->issue_eof = true;
 	}
 
 	return 0;
 }
 
 static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
-		struct sk_buff *skb, u16 timeout,
-		nfc_digital_cmd_complete_t cb, void *arg)
+			     struct sk_buff *skb, u16 timeout,
+			     nfc_digital_cmd_complete_t cb, void *arg)
 {
 	struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
 	u8 prefix[5];
@@ -1485,7 +1470,7 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
 	u8 status;
 
 	dev_dbg(trf->dev, "New request - state: %d, timeout: %d ms, len: %d\n",
-			trf->state, timeout, skb->len);
+		trf->state, timeout, skb->len);
 
 	if (skb->len > TRF7970A_TX_MAX)
 		return -EINVAL;
@@ -1493,9 +1478,9 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
 	mutex_lock(&trf->lock);
 
 	if ((trf->state != TRF7970A_ST_IDLE) &&
-			(trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
+	    (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
 		dev_err(trf->dev, "%s - Bogus state: %d\n", __func__,
-				trf->state);
+			trf->state);
 		ret = -EIO;
 		goto out_err;
 	}
@@ -1509,7 +1494,7 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
 
 	if (timeout) {
 		trf->rx_skb = nfc_alloc_recv_skb(TRF7970A_RX_SKB_ALLOC_SIZE,
-				GFP_KERNEL);
+						 GFP_KERNEL);
 		if (!trf->rx_skb) {
 			dev_dbg(trf->dev, "Can't alloc rx_skb\n");
 			ret = -ENOMEM;
@@ -1546,14 +1531,14 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
 	 * That totals 5 bytes.
 	 */
 	prefix[0] = TRF7970A_CMD_BIT_CTRL |
-			TRF7970A_CMD_BIT_OPCODE(TRF7970A_CMD_FIFO_RESET);
+	    TRF7970A_CMD_BIT_OPCODE(TRF7970A_CMD_FIFO_RESET);
 	prefix[1] = TRF7970A_CMD_BIT_CTRL |
-			TRF7970A_CMD_BIT_OPCODE(trf->tx_cmd);
+	    TRF7970A_CMD_BIT_OPCODE(trf->tx_cmd);
 	prefix[2] = TRF7970A_CMD_BIT_CONTINUOUS | TRF7970A_TX_LENGTH_BYTE1;
 
 	if (trf->framing == NFC_DIGITAL_FRAMING_NFCA_SHORT) {
 		prefix[3] = 0x00;
-		prefix[4] = 0x0f; /* 7 bits */
+		prefix[4] = 0x0f;	/* 7 bits */
 	} else {
 		prefix[3] = (len & 0xf00) >> 4;
 		prefix[3] |= ((len & 0xf0) >> 4);
@@ -1587,25 +1572,24 @@ static int trf7970a_tg_config_rf_tech(struct trf7970a *trf, int tech)
 	switch (tech) {
 	case NFC_DIGITAL_RF_TECH_106A:
 		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE |
-			TRF7970A_ISO_CTRL_NFC_CE |
-			TRF7970A_ISO_CTRL_NFC_CE_14443A;
+		    TRF7970A_ISO_CTRL_NFC_CE | TRF7970A_ISO_CTRL_NFC_CE_14443A;
 		trf->modulator_sys_clk_ctrl =
-			(trf->modulator_sys_clk_ctrl & 0xf8) |
-			TRF7970A_MODULATOR_DEPTH_OOK;
+		    (trf->modulator_sys_clk_ctrl & 0xf8) |
+		    TRF7970A_MODULATOR_DEPTH_OOK;
 		break;
 	case NFC_DIGITAL_RF_TECH_212F:
 		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE |
-			TRF7970A_ISO_CTRL_NFC_NFCF_212;
+		    TRF7970A_ISO_CTRL_NFC_NFCF_212;
 		trf->modulator_sys_clk_ctrl =
-			(trf->modulator_sys_clk_ctrl & 0xf8) |
-			TRF7970A_MODULATOR_DEPTH_ASK10;
+		    (trf->modulator_sys_clk_ctrl & 0xf8) |
+		    TRF7970A_MODULATOR_DEPTH_ASK10;
 		break;
 	case NFC_DIGITAL_RF_TECH_424F:
 		trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE |
-			TRF7970A_ISO_CTRL_NFC_NFCF_424;
+		    TRF7970A_ISO_CTRL_NFC_NFCF_424;
 		trf->modulator_sys_clk_ctrl =
-			(trf->modulator_sys_clk_ctrl & 0xf8) |
-			TRF7970A_MODULATOR_DEPTH_ASK10;
+		    (trf->modulator_sys_clk_ctrl & 0xf8) |
+		    TRF7970A_MODULATOR_DEPTH_ASK10;
 		break;
 	default:
 		dev_dbg(trf->dev, "Unsupported rf technology: %d\n", tech);
@@ -1622,9 +1606,9 @@ static int trf7970a_tg_config_rf_tech(struct trf7970a *trf, int tech)
 	 * here.
 	 */
 	if ((trf->framing == NFC_DIGITAL_FRAMING_NFC_DEP_ACTIVATED) &&
-			(trf->iso_ctrl_tech != trf->iso_ctrl)) {
+	    (trf->iso_ctrl_tech != trf->iso_ctrl)) {
 		ret = trf7970a_write(trf, TRF7970A_ISO_CTRL,
-				trf->iso_ctrl_tech);
+				     trf->iso_ctrl_tech);
 
 		trf->iso_ctrl = trf->iso_ctrl_tech;
 	}
@@ -1679,15 +1663,15 @@ static int trf7970a_tg_config_framing(struct trf7970a *trf, int framing)
 		trf->iso_ctrl = iso_ctrl;
 
 		ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
-				trf->modulator_sys_clk_ctrl);
+				     trf->modulator_sys_clk_ctrl);
 		if (ret)
 			return ret;
 	}
 
 	if (!(trf->chip_status_ctrl & TRF7970A_CHIP_STATUS_RF_ON)) {
 		ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
-				trf->chip_status_ctrl |
-					TRF7970A_CHIP_STATUS_RF_ON);
+				     trf->chip_status_ctrl |
+				     TRF7970A_CHIP_STATUS_RF_ON);
 		if (ret)
 			return ret;
 
@@ -1698,7 +1682,7 @@ static int trf7970a_tg_config_framing(struct trf7970a *trf, int framing)
 }
 
 static int trf7970a_tg_configure_hw(struct nfc_digital_dev *ddev, int type,
-		int param)
+				    int param)
 {
 	struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
 	int ret;
@@ -1710,7 +1694,7 @@ static int trf7970a_tg_configure_hw(struct nfc_digital_dev *ddev, int type,
 	trf->is_initiator = false;
 
 	if ((trf->state == TRF7970A_ST_PWR_OFF) ||
-			(trf->state == TRF7970A_ST_RF_OFF)) {
+	    (trf->state == TRF7970A_ST_RF_OFF)) {
 		ret = trf7970a_switch_rf_on(trf);
 		if (ret)
 			goto err_unlock;
@@ -1734,7 +1718,8 @@ err_unlock:
 }
 
 static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
-		nfc_digital_cmd_complete_t cb, void *arg, bool mode_detect)
+			       nfc_digital_cmd_complete_t cb, void *arg,
+			       bool mode_detect)
 {
 	struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
 	int ret;
@@ -1742,9 +1727,9 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
 	mutex_lock(&trf->lock);
 
 	if ((trf->state != TRF7970A_ST_IDLE) &&
-			(trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
+	    (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
 		dev_err(trf->dev, "%s - Bogus state: %d\n", __func__,
-				trf->state);
+			trf->state);
 		ret = -EIO;
 		goto out_err;
 	}
@@ -1757,7 +1742,7 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
 	}
 
 	trf->rx_skb = nfc_alloc_recv_skb(TRF7970A_RX_SKB_ALLOC_SIZE,
-			GFP_KERNEL);
+					 GFP_KERNEL);
 	if (!trf->rx_skb) {
 		dev_dbg(trf->dev, "Can't alloc rx_skb\n");
 		ret = -ENOMEM;
@@ -1765,25 +1750,25 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
 	}
 
 	ret = trf7970a_write(trf, TRF7970A_RX_SPECIAL_SETTINGS,
-			TRF7970A_RX_SPECIAL_SETTINGS_HBT |
-			TRF7970A_RX_SPECIAL_SETTINGS_M848 |
-			TRF7970A_RX_SPECIAL_SETTINGS_C424 |
-			TRF7970A_RX_SPECIAL_SETTINGS_C212);
+			     TRF7970A_RX_SPECIAL_SETTINGS_HBT |
+			     TRF7970A_RX_SPECIAL_SETTINGS_M848 |
+			     TRF7970A_RX_SPECIAL_SETTINGS_C424 |
+			     TRF7970A_RX_SPECIAL_SETTINGS_C212);
 	if (ret)
 		goto out_err;
 
 	ret = trf7970a_write(trf, TRF7970A_REG_IO_CTRL,
-			trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
+			     trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
 	if (ret)
 		goto out_err;
 
 	ret = trf7970a_write(trf, TRF7970A_NFC_LOW_FIELD_LEVEL,
-			TRF7970A_NFC_LOW_FIELD_LEVEL_RFDET(0x3));
+			     TRF7970A_NFC_LOW_FIELD_LEVEL_RFDET(0x3));
 	if (ret)
 		goto out_err;
 
 	ret = trf7970a_write(trf, TRF7970A_NFC_TARGET_LEVEL,
-			TRF7970A_NFC_TARGET_LEVEL_RFDET(0x7));
+			     TRF7970A_NFC_TARGET_LEVEL_RFDET(0x7));
 	if (ret)
 		goto out_err;
 
@@ -1808,32 +1793,33 @@ out_err:
 }
 
 static int trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
-		nfc_digital_cmd_complete_t cb, void *arg)
+			      nfc_digital_cmd_complete_t cb, void *arg)
 {
 	struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
 
 	dev_dbg(trf->dev, "Listen - state: %d, timeout: %d ms\n",
-			trf->state, timeout);
+		trf->state, timeout);
 
 	return _trf7970a_tg_listen(ddev, timeout, cb, arg, false);
 }
 
 static int trf7970a_tg_listen_md(struct nfc_digital_dev *ddev,
-		u16 timeout, nfc_digital_cmd_complete_t cb, void *arg)
+				 u16 timeout, nfc_digital_cmd_complete_t cb,
+				 void *arg)
 {
 	struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
 	int ret;
 
 	dev_dbg(trf->dev, "Listen MD - state: %d, timeout: %d ms\n",
-			trf->state, timeout);
+		trf->state, timeout);
 
 	ret = trf7970a_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH,
-			NFC_DIGITAL_RF_TECH_106A);
+				       NFC_DIGITAL_RF_TECH_106A);
 	if (ret)
 		return ret;
 
 	ret = trf7970a_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
-			NFC_DIGITAL_FRAMING_NFCA_NFC_DEP);
+				       NFC_DIGITAL_FRAMING_NFCA_NFC_DEP);
 	if (ret)
 		return ret;
 
@@ -1845,7 +1831,7 @@ static int trf7970a_tg_get_rf_tech(struct nfc_digital_dev *ddev, u8 *rf_tech)
 	struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
 
 	dev_dbg(trf->dev, "Get RF Tech - state: %d, rf_tech: %d\n",
-			trf->state, trf->md_rf_tech);
+		trf->state, trf->md_rf_tech);
 
 	*rf_tech = trf->md_rf_tech;
 
@@ -1908,14 +1894,13 @@ static int trf7970a_power_up(struct trf7970a *trf)
 
 	usleep_range(5000, 6000);
 
-	if (!(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) {
-		if (gpio_is_valid(trf->en2_gpio)) {
-			gpio_set_value(trf->en2_gpio, 1);
-			usleep_range(1000, 2000);
-		}
+	if (trf->en2_gpiod &&
+	    !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) {
+		gpiod_set_value_cansleep(trf->en2_gpiod, 1);
+		usleep_range(1000, 2000);
 	}
 
-	gpio_set_value(trf->en_gpio, 1);
+	gpiod_set_value_cansleep(trf->en_gpiod, 1);
 
 	usleep_range(20000, 21000);
 
@@ -1935,18 +1920,19 @@ static int trf7970a_power_down(struct trf7970a *trf)
 
 	if (trf->state != TRF7970A_ST_RF_OFF) {
 		dev_dbg(trf->dev, "Can't power down - not RF_OFF state (%d)\n",
-				trf->state);
+			trf->state);
 		return -EBUSY;
 	}
 
-	gpio_set_value(trf->en_gpio, 0);
-	if (gpio_is_valid(trf->en2_gpio))
-		gpio_set_value(trf->en2_gpio, 0);
+	gpiod_set_value_cansleep(trf->en_gpiod, 0);
+
+	if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW))
+		gpiod_set_value_cansleep(trf->en2_gpiod, 0);
 
 	ret = regulator_disable(trf->regulator);
 	if (ret)
 		dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__,
-				ret);
+			ret);
 
 	trf->state = TRF7970A_ST_PWR_OFF;
 
@@ -2003,12 +1989,6 @@ static int trf7970a_get_autosuspend_delay(struct device_node *np)
 	return autosuspend_delay;
 }
 
-static int trf7970a_get_vin_voltage_override(struct device_node *np,
-		u32 *vin_uvolts)
-{
-	return of_property_read_u32(np, "vin-voltage-override", vin_uvolts);
-}
-
 static int trf7970a_probe(struct spi_device *spi)
 {
 	struct device_node *np = spi->dev.of_node;
@@ -2038,53 +2018,48 @@ static int trf7970a_probe(struct spi_device *spi)
 		return ret;
 	}
 
-	if (of_property_read_bool(np, "t5t-rmb-extra-byte-quirk"))
-		trf->quirks |= TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE;
-
 	if (of_property_read_bool(np, "irq-status-read-quirk"))
 		trf->quirks |= TRF7970A_QUIRK_IRQ_STATUS_READ;
 
-	/* There are two enable pins - both must be present */
-	trf->en_gpio = of_get_named_gpio(np, "ti,enable-gpios", 0);
-	if (!gpio_is_valid(trf->en_gpio)) {
+	/* There are two enable pins - only EN must be present in the DT */
+	trf->en_gpiod = devm_gpiod_get_index(trf->dev, "ti,enable", 0,
+					     GPIOD_OUT_LOW);
+	if (IS_ERR(trf->en_gpiod)) {
 		dev_err(trf->dev, "No EN GPIO property\n");
-		return trf->en_gpio;
+		return PTR_ERR(trf->en_gpiod);
 	}
 
-	ret = devm_gpio_request_one(trf->dev, trf->en_gpio,
-			GPIOF_DIR_OUT | GPIOF_INIT_LOW, "trf7970a EN");
-	if (ret) {
-		dev_err(trf->dev, "Can't request EN GPIO: %d\n", ret);
-		return ret;
-	}
-
-	trf->en2_gpio = of_get_named_gpio(np, "ti,enable-gpios", 1);
-	if (!gpio_is_valid(trf->en2_gpio)) {
+	trf->en2_gpiod = devm_gpiod_get_index_optional(trf->dev, "ti,enable", 1,
+						       GPIOD_OUT_LOW);
+	if (!trf->en2_gpiod) {
 		dev_info(trf->dev, "No EN2 GPIO property\n");
-	} else {
-		ret = devm_gpio_request_one(trf->dev, trf->en2_gpio,
-				GPIOF_DIR_OUT | GPIOF_INIT_LOW, "trf7970a EN2");
-		if (ret) {
-			dev_err(trf->dev, "Can't request EN2 GPIO: %d\n", ret);
-			return ret;
-		}
+	} else if (IS_ERR(trf->en2_gpiod)) {
+		dev_err(trf->dev, "Error getting EN2 GPIO property: %ld\n",
+			PTR_ERR(trf->en2_gpiod));
+		return PTR_ERR(trf->en2_gpiod);
+	} else if (of_property_read_bool(np, "en2-rf-quirk")) {
+		trf->quirks |= TRF7970A_QUIRK_EN2_MUST_STAY_LOW;
 	}
 
 	of_property_read_u32(np, "clock-frequency", &clk_freq);
-	if ((clk_freq != TRF7970A_27MHZ_CLOCK_FREQUENCY) ||
-		(clk_freq != TRF7970A_13MHZ_CLOCK_FREQUENCY)) {
+	if ((clk_freq != TRF7970A_27MHZ_CLOCK_FREQUENCY) &&
+	    (clk_freq != TRF7970A_13MHZ_CLOCK_FREQUENCY)) {
 		dev_err(trf->dev,
-			"clock-frequency (%u Hz) unsupported\n",
-			clk_freq);
+			"clock-frequency (%u Hz) unsupported\n", clk_freq);
 		return -EINVAL;
 	}
 
-	if (of_property_read_bool(np, "en2-rf-quirk"))
-		trf->quirks |= TRF7970A_QUIRK_EN2_MUST_STAY_LOW;
+	if (clk_freq == TRF7970A_27MHZ_CLOCK_FREQUENCY) {
+		trf->modulator_sys_clk_ctrl = TRF7970A_MODULATOR_27MHZ;
+		dev_dbg(trf->dev, "trf7970a configured for 27MHz crystal\n");
+	} else {
+		trf->modulator_sys_clk_ctrl = 0;
+	}
 
 	ret = devm_request_threaded_irq(trf->dev, spi->irq, NULL,
-			trf7970a_irq, IRQF_TRIGGER_RISING | IRQF_ONESHOT,
-			"trf7970a", trf);
+					trf7970a_irq,
+					IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+					"trf7970a", trf);
 	if (ret) {
 		dev_err(trf->dev, "Can't request IRQ#%d: %d\n", spi->irq, ret);
 		return ret;
@@ -2106,10 +2081,7 @@ static int trf7970a_probe(struct spi_device *spi)
 		goto err_destroy_lock;
 	}
 
-	ret = trf7970a_get_vin_voltage_override(np, &uvolts);
-	if (ret)
-		uvolts = regulator_get_voltage(trf->regulator);
-
+	uvolts = regulator_get_voltage(trf->regulator);
 	if (uvolts > 4000000)
 		trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3;
 
@@ -2132,9 +2104,10 @@ static int trf7970a_probe(struct spi_device *spi)
 	}
 
 	trf->ddev = nfc_digital_allocate_device(&trf7970a_nfc_ops,
-			TRF7970A_SUPPORTED_PROTOCOLS,
-			NFC_DIGITAL_DRV_CAPS_IN_CRC |
-				NFC_DIGITAL_DRV_CAPS_TG_CRC, 0, 0);
+						TRF7970A_SUPPORTED_PROTOCOLS,
+						NFC_DIGITAL_DRV_CAPS_IN_CRC |
+						NFC_DIGITAL_DRV_CAPS_TG_CRC, 0,
+						0);
 	if (!trf->ddev) {
 		dev_err(trf->dev, "Can't allocate NFC digital device\n");
 		ret = -ENOMEM;
@@ -2157,7 +2130,7 @@ static int trf7970a_probe(struct spi_device *spi)
 	ret = nfc_digital_register_device(trf->ddev);
 	if (ret) {
 		dev_err(trf->dev, "Can't register NFC digital device: %d\n",
-				ret);
+			ret);
 		goto err_shutdown;
 	}
 
@@ -2266,29 +2239,31 @@ static int trf7970a_pm_runtime_resume(struct device *dev)
 static const struct dev_pm_ops trf7970a_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(trf7970a_suspend, trf7970a_resume)
 	SET_RUNTIME_PM_OPS(trf7970a_pm_runtime_suspend,
-			trf7970a_pm_runtime_resume, NULL)
+			   trf7970a_pm_runtime_resume, NULL)
 };
 
 static const struct of_device_id trf7970a_of_match[] = {
-	{ .compatible = "ti,trf7970a", },
-	{ /* sentinel */ },
+	{.compatible = "ti,trf7970a",},
+	{},
 };
+
 MODULE_DEVICE_TABLE(of, trf7970a_of_match);
 
 static const struct spi_device_id trf7970a_id_table[] = {
-	{ "trf7970a", 0 },
-	{ }
+	{"trf7970a", 0},
+	{}
 };
+
 MODULE_DEVICE_TABLE(spi, trf7970a_id_table);
 
 static struct spi_driver trf7970a_spi_driver = {
 	.probe		= trf7970a_probe,
 	.remove		= trf7970a_remove,
 	.id_table	= trf7970a_id_table,
-	.driver		= {
-		.name	= "trf7970a",
-		.of_match_table = of_match_ptr(trf7970a_of_match),
-		.pm	= &trf7970a_pm_ops,
+	.driver	= {
+		.name		= "trf7970a",
+		.of_match_table	= of_match_ptr(trf7970a_of_match),
+		.pm		= &trf7970a_pm_ops,
 	},
 };
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 951042a375d6..40c7581caeb0 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1805,7 +1805,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	if (pci_is_enabled(pdev)) {
 		u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
-		if (dev->ctrl.state == NVME_CTRL_LIVE)
+		if (dev->ctrl.state == NVME_CTRL_LIVE ||
+		    dev->ctrl.state == NVME_CTRL_RESETTING)
 			nvme_start_freeze(&dev->ctrl);
 		dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
 			pdev->error_state  != pci_channel_io_normal);
diff --git a/drivers/s390/net/ctcm_fsms.c b/drivers/s390/net/ctcm_fsms.c
index e9847ce3860d..570ae3b7adf6 100644
--- a/drivers/s390/net/ctcm_fsms.c
+++ b/drivers/s390/net/ctcm_fsms.c
@@ -217,7 +217,7 @@ void ctcm_purge_skb_queue(struct sk_buff_head *q)
 	CTCM_DBF_TEXT(TRACE, CTC_DBF_DEBUG, __func__);
 
 	while ((skb = skb_dequeue(q))) {
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_any(skb);
 	}
 }
@@ -271,7 +271,7 @@ static void chx_txdone(fsm_instance *fi, int event, void *arg)
 			priv->stats.tx_bytes += 2;
 			first = 0;
 		}
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_irq(skb);
 	}
 	spin_lock(&ch->collect_lock);
@@ -297,7 +297,7 @@ static void chx_txdone(fsm_instance *fi, int event, void *arg)
 				skb_put(ch->trans_skb, skb->len), skb->len);
 			priv->stats.tx_packets++;
 			priv->stats.tx_bytes += skb->len - LL_HEADER_LENGTH;
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 			dev_kfree_skb_irq(skb);
 			i++;
 		}
@@ -1248,7 +1248,7 @@ static void ctcmpc_chx_txdone(fsm_instance *fi, int event, void *arg)
 			priv->stats.tx_bytes += 2;
 			first = 0;
 		}
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_irq(skb);
 	}
 	spin_lock(&ch->collect_lock);
@@ -1298,7 +1298,7 @@ static void ctcmpc_chx_txdone(fsm_instance *fi, int event, void *arg)
 		data_space -= skb->len;
 		priv->stats.tx_packets++;
 		priv->stats.tx_bytes += skb->len;
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_any(skb);
 		peekskb = skb_peek(&ch->collect_queue);
 		if (peekskb->len > data_space)
@@ -1795,7 +1795,7 @@ static void ctcmpc_chx_send_sweep(fsm_instance *fsm, int event, void *arg)
 		fsm_event(grp->fsm, MPCG_EVENT_INOP, dev);
 				goto done;
 	} else {
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		skb_queue_tail(&wch->io_queue, skb);
 	}
 
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 99121352c57b..e8782a8619f7 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -483,7 +483,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 			spin_unlock_irqrestore(&ch->collect_lock, saveflags);
 			return -EBUSY;
 		} else {
-			atomic_inc(&skb->users);
+			refcount_inc(&skb->users);
 			header.length = l;
 			header.type = be16_to_cpu(skb->protocol);
 			header.unused = 0;
@@ -500,7 +500,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 	 * Protect skb against beeing free'd by upper
 	 * layers.
 	 */
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 	ch->prof.txlen += skb->len;
 	header.length = skb->len + LL_HEADER_LENGTH;
 	header.type = be16_to_cpu(skb->protocol);
@@ -517,14 +517,14 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 	if (hi) {
 		nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
 		if (!nskb) {
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 			skb_pull(skb, LL_HEADER_LENGTH + 2);
 			ctcm_clear_busy(ch->netdev);
 			return -ENOMEM;
 		} else {
 			skb_put_data(nskb, skb->data, skb->len);
-			atomic_inc(&nskb->users);
-			atomic_dec(&skb->users);
+			refcount_inc(&nskb->users);
+			refcount_dec(&skb->users);
 			dev_kfree_skb_irq(skb);
 			skb = nskb;
 		}
@@ -542,7 +542,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 			 * Remove our header. It gets added
 			 * again on retransmit.
 			 */
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 			skb_pull(skb, LL_HEADER_LENGTH + 2);
 			ctcm_clear_busy(ch->netdev);
 			return -ENOMEM;
@@ -553,7 +553,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
 		ch->ccw[1].count = skb->len;
 		skb_copy_from_linear_data(skb,
 				skb_put(ch->trans_skb, skb->len), skb->len);
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_irq(skb);
 		ccw_idx = 0;
 	} else {
@@ -679,7 +679,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
 
 	if ((fsm_getstate(ch->fsm) != CTC_STATE_TXIDLE) || grp->in_sweep) {
 		spin_lock_irqsave(&ch->collect_lock, saveflags);
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		p_header = kmalloc(PDU_HEADER_LENGTH, gfp_type());
 
 		if (!p_header) {
@@ -716,7 +716,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
 	 * Protect skb against beeing free'd by upper
 	 * layers.
 	 */
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 
 	/*
 	 * IDAL support in CTCM is broken, so we have to
@@ -729,8 +729,8 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
 			goto nomem_exit;
 		} else {
 			skb_put_data(nskb, skb->data, skb->len);
-			atomic_inc(&nskb->users);
-			atomic_dec(&skb->users);
+			refcount_inc(&nskb->users);
+			refcount_dec(&skb->users);
 			dev_kfree_skb_irq(skb);
 			skb = nskb;
 		}
@@ -810,7 +810,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
 		ch->trans_skb->len = 0;
 		ch->ccw[1].count = skb->len;
 		skb_put_data(ch->trans_skb, skb->data, skb->len);
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_irq(skb);
 		ccw_idx = 0;
 		CTCM_PR_DBGDATA("%s(%s): trans_skb len: %04x\n"
@@ -855,7 +855,7 @@ nomem_exit:
 			"%s(%s): MEMORY allocation ERROR\n",
 			CTCM_FUNTAIL, ch->id);
 	rc = -ENOMEM;
-	atomic_dec(&skb->users);
+	refcount_dec(&skb->users);
 	dev_kfree_skb_any(skb);
 	fsm_event(priv->mpcg->fsm, MPCG_EVENT_INOP, dev);
 done:
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 7db427c0a6a4..1579695f4e64 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -743,7 +743,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 	conn->prof.tx_pending--;
 	if (single_flag) {
 		if ((skb = skb_dequeue(&conn->commit_queue))) {
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 			if (privptr) {
 				privptr->stats.tx_packets++;
 				privptr->stats.tx_bytes +=
@@ -766,7 +766,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 		txbytes += skb->len;
 		txpackets++;
 		stat_maxcq++;
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_any(skb);
 	}
 	if (conn->collect_len > conn->prof.maxmulti)
@@ -958,7 +958,7 @@ static void netiucv_purge_skb_queue(struct sk_buff_head *q)
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(q))) {
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_any(skb);
 	}
 }
@@ -1176,7 +1176,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
 			IUCV_DBF_TEXT(data, 2,
 				      "EBUSY from netiucv_transmit_skb\n");
 		} else {
-			atomic_inc(&skb->users);
+			refcount_inc(&skb->users);
 			skb_queue_tail(&conn->collect_queue, skb);
 			conn->collect_len += l;
 			rc = 0;
@@ -1245,7 +1245,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
 		} else {
 			if (copied)
 				dev_kfree_skb(skb);
-			atomic_inc(&nskb->users);
+			refcount_inc(&nskb->users);
 			skb_queue_tail(&conn->commit_queue, nskb);
 		}
 	}
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 3b657d5b7e49..aec06e10b969 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1242,7 +1242,7 @@ static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf)
 				iucv->sk_txnotify(skb, TX_NOTIFY_GENERALERROR);
 			}
 		}
-		atomic_dec(&skb->users);
+		refcount_dec(&skb->users);
 		dev_kfree_skb_any(skb);
 		skb = skb_dequeue(&buf->skb_list);
 	}
@@ -3975,7 +3975,7 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
 	int flush_cnt = 0, hdr_len, large_send = 0;
 
 	buffer = buf->buffer;
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 	skb_queue_tail(&buf->skb_list, skb);
 
 	/*check first on TSO ....*/
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index 2ee92aa90fe9..e937490d5d97 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -870,7 +870,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
 		QEDI_ERR(&qedi->dbg_ctx,
 			 "Delayed or untracked cleanup response, itt=0x%x, tid=0x%x, cid=0x%x, task=%p\n",
 			 protoitt, cqe->itid, qedi_conn->iscsi_conn_id, task);
-		WARN_ON(1);
 	}
 }
 
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index f46880315ba8..5f5a4ef2e529 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1499,11 +1499,9 @@ err_idx:
 
 void qedi_clear_task_idx(struct qedi_ctx *qedi, int idx)
 {
-	if (!test_and_clear_bit(idx, qedi->task_idx_map)) {
+	if (!test_and_clear_bit(idx, qedi->task_idx_map))
 		QEDI_ERR(&qedi->dbg_ctx,
 			 "FW task context, already cleared, tid=0x%x\n", idx);
-		WARN_ON(1);
-	}
 }
 
 void qedi_update_itt_map(struct qedi_ctx *qedi, u32 tid, u32 proto_itt,
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 0d8f81591bed..3fdca2cdd8da 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1279,6 +1279,18 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr,
 	 */
 	if (dump_payload)
 		goto after_immediate_data;
+	/*
+	 * Check for underflow case where both EDTL and immediate data payload
+	 * exceeds what is presented by CDB's TRANSFER LENGTH, and what has
+	 * already been set in target_cmd_size_check() as se_cmd->data_length.
+	 *
+	 * For this special case, fail the command and dump the immediate data
+	 * payload.
+	 */
+	if (cmd->first_burst_len > cmd->se_cmd.data_length) {
+		cmd->sense_reason = TCM_INVALID_CDB_FIELD;
+		goto after_immediate_data;
+	}
 
 	immed_ret = iscsit_handle_immediate_data(cmd, hdr,
 					cmd->first_burst_len);
@@ -4423,8 +4435,11 @@ static void iscsit_logout_post_handler_closesession(
 	 * always sleep waiting for RX/TX thread shutdown to complete
 	 * within iscsit_close_connection().
 	 */
-	if (!conn->conn_transport->rdma_shutdown)
+	if (!conn->conn_transport->rdma_shutdown) {
 		sleep = cmpxchg(&conn->tx_thread_active, true, false);
+		if (!sleep)
+			return;
+	}
 
 	atomic_set(&conn->conn_logout_remove, 0);
 	complete(&conn->conn_logout_comp);
@@ -4440,8 +4455,11 @@ static void iscsit_logout_post_handler_samecid(
 {
 	int sleep = 1;
 
-	if (!conn->conn_transport->rdma_shutdown)
+	if (!conn->conn_transport->rdma_shutdown) {
 		sleep = cmpxchg(&conn->tx_thread_active, true, false);
+		if (!sleep)
+			return;
+	}
 
 	atomic_set(&conn->conn_logout_remove, 0);
 	complete(&conn->conn_logout_comp);
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 9ab7090f7c83..0912de7c0cf8 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -136,7 +136,7 @@ int	init_se_kmem_caches(void);
 void	release_se_kmem_caches(void);
 u32	scsi_get_new_index(scsi_index_t);
 void	transport_subsystem_check_init(void);
-void	transport_cmd_finish_abort(struct se_cmd *, int);
+int	transport_cmd_finish_abort(struct se_cmd *, int);
 unsigned char *transport_dump_cmd_direction(struct se_cmd *);
 void	transport_dump_dev_state(struct se_device *, char *, int *);
 void	transport_dump_dev_info(struct se_device *, struct se_lun *,
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index dce1e1b47316..13f47bf4d16b 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -75,7 +75,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr)
 	kfree(tmr);
 }
 
-static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
+static int core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
 {
 	unsigned long flags;
 	bool remove = true, send_tas;
@@ -91,7 +91,7 @@ static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
 		transport_send_task_abort(cmd);
 	}
 
-	transport_cmd_finish_abort(cmd, remove);
+	return transport_cmd_finish_abort(cmd, remove);
 }
 
 static int target_check_cdb_and_preempt(struct list_head *list,
@@ -184,8 +184,8 @@ void core_tmr_abort_task(
 		cancel_work_sync(&se_cmd->work);
 		transport_wait_for_tasks(se_cmd);
 
-		transport_cmd_finish_abort(se_cmd, true);
-		target_put_sess_cmd(se_cmd);
+		if (!transport_cmd_finish_abort(se_cmd, true))
+			target_put_sess_cmd(se_cmd);
 
 		printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
 				" ref_tag: %llu\n", ref_tag);
@@ -281,8 +281,8 @@ static void core_tmr_drain_tmr_list(
 		cancel_work_sync(&cmd->work);
 		transport_wait_for_tasks(cmd);
 
-		transport_cmd_finish_abort(cmd, 1);
-		target_put_sess_cmd(cmd);
+		if (!transport_cmd_finish_abort(cmd, 1))
+			target_put_sess_cmd(cmd);
 	}
 }
 
@@ -380,8 +380,8 @@ static void core_tmr_drain_state_list(
 		cancel_work_sync(&cmd->work);
 		transport_wait_for_tasks(cmd);
 
-		core_tmr_handle_tas_abort(cmd, tas);
-		target_put_sess_cmd(cmd);
+		if (!core_tmr_handle_tas_abort(cmd, tas))
+			target_put_sess_cmd(cmd);
 	}
 }
 
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 6025935036c9..f1b3a46bdcaf 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -651,9 +651,10 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
 		percpu_ref_put(&lun->lun_ref);
 }
 
-void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
+int transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 {
 	bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF);
+	int ret = 0;
 
 	if (cmd->se_cmd_flags & SCF_SE_LUN_CMD)
 		transport_lun_remove_cmd(cmd);
@@ -665,9 +666,11 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 		cmd->se_tfo->aborted_task(cmd);
 
 	if (transport_cmd_check_stop_to_fabric(cmd))
-		return;
+		return 1;
 	if (remove && ack_kref)
-		transport_put_cmd(cmd);
+		ret = transport_put_cmd(cmd);
+
+	return ret;
 }
 
 static void target_complete_failure_work(struct work_struct *work)
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 734cbf8d9676..dd9f1bebb5a3 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
 	int status;
 
 	token = (autofs_wqt_t) param->fail.token;
-	status = param->fail.status ? param->fail.status : -ENOENT;
+	status = param->fail.status < 0 ? param->fail.status : -ENOENT;
 	return autofs4_wait_release(sbi, token, status);
 }
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 519599dddd36..0a7404ef9335 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -263,7 +263,10 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 		kfree(vecs);
 
 	if (unlikely(bio.bi_error))
-		return bio.bi_error;
+		ret = bio.bi_error;
+
+	bio_uninit(&bio);
+
 	return ret;
 }
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0fd081bd2a2f..fcef70602b27 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3271,7 +3271,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 	if (!is_sync_kiocb(iocb))
 		ctx->iocb = iocb;
 
-	if (to->type & ITER_IOVEC)
+	if (to->type == ITER_IOVEC)
 		ctx->should_dirty = true;
 
 	rc = setup_aio_ctx_iter(ctx, to, READ);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index b08531977daa..3b147dc6af63 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
 
 	if (!pages) {
 		pages = vmalloc(max_pages * sizeof(struct page *));
-		if (!bv) {
+		if (!pages) {
 			kvfree(bv);
 			return -ENOMEM;
 		}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 27bc360c7ffd..a723df3e0197 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 		     struct cifs_fid *fid, __u16 search_flags,
 		     struct cifs_search_info *srch_inf)
 {
-	return CIFSFindFirst(xid, tcon, path, cifs_sb,
-			     &fid->netfid, search_flags, srch_inf, true);
+	int rc;
+
+	rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
+			   &fid->netfid, search_flags, srch_inf, true);
+	if (rc)
+		cifs_dbg(FYI, "find first failed=%d\n", rc);
+	return rc;
 }
 
 static int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c58691834eb2..7e48561abd29 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
 	kfree(utf16_path);
 	if (rc) {
-		cifs_dbg(VFS, "open dir failed\n");
+		cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
 		return rc;
 	}
 
@@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
 				  fid->volatile_fid, 0, srch_inf);
 	if (rc) {
-		cifs_dbg(VFS, "query directory failed\n");
+		cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
 		SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
 	}
 	return rc;
@@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 
 	sg = init_sg(rqst, sign);
 	if (!sg) {
-		cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
+		cifs_dbg(VFS, "%s: Failed to init sg", __func__);
+		rc = -ENOMEM;
 		goto free_req;
 	}
 
@@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 	iv = kzalloc(iv_len, GFP_KERNEL);
 	if (!iv) {
 		cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
+		rc = -ENOMEM;
 		goto free_sg;
 	}
 	iv[0] = 3;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 3cb5c9e2d4e7..de50e749ff05 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
 	pcreatetime = (__u64 *)value;
 	*pcreatetime = CIFS_I(inode)->createtime;
 	return sizeof(__u64);
-
-	return rc;
 }
 
 
diff --git a/fs/dax.c b/fs/dax.c
index 2a6889b3585f..9187f3b07f3e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -859,6 +859,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 			if (ret < 0)
 				goto out;
 		}
+		start_index = indices[pvec.nr - 1] + 1;
 	}
 out:
 	put_dax(dax_dev);
diff --git a/fs/exec.c b/fs/exec.c
index 72934df68471..904199086490 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -220,8 +220,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 
 	if (write) {
 		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+		unsigned long ptr_size;
 		struct rlimit *rlim;
 
+		/*
+		 * Since the stack will hold pointers to the strings, we
+		 * must account for them as well.
+		 *
+		 * The size calculation is the entire vma while each arg page is
+		 * built, so each time we get here it's calculating how far it
+		 * is currently (rather than each call being just the newly
+		 * added size from the arg page).  As a result, we need to
+		 * always add the entire size of the pointers, so that on the
+		 * last call to get_arg_page() we'll actually have the entire
+		 * correct size.
+		 */
+		ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+		if (ptr_size > ULONG_MAX - size)
+			goto fail;
+		size += ptr_size;
+
 		acct_arg_size(bprm, size / PAGE_SIZE);
 
 		/*
@@ -239,13 +257,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		 *    to work from.
 		 */
 		rlim = current->signal->rlim;
-		if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
-			put_page(page);
-			return NULL;
-		}
+		if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+			goto fail;
 	}
 
 	return page;
+
+fail:
+	put_page(page);
+	return NULL;
 }
 
 static void put_arg_page(struct page *page)
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c14758e08d73..390ac9c39c59 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -753,7 +753,6 @@ static void nfs4_callback_free_slot(struct nfs4_session *session,
 	 * A single slot, so highest used slotid is either 0 or -1
 	 */
 	nfs4_free_slot(tbl, slot);
-	nfs4_slot_tbl_drain_complete(tbl);
 	spin_unlock(&tbl->slot_tbl_lock);
 }
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32ccd7754f8a..2ac00bf4ecf1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1946,29 +1946,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(nfs_link);
 
-static void
-nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
-{
-	struct dentry *old_dentry = data->old_dentry;
-	struct dentry *new_dentry = data->new_dentry;
-	struct inode *old_inode = d_inode(old_dentry);
-	struct inode *new_inode = d_inode(new_dentry);
-
-	nfs_mark_for_revalidate(old_inode);
-
-	switch (task->tk_status) {
-	case 0:
-		if (new_inode != NULL)
-			nfs_drop_nlink(new_inode);
-		d_move(old_dentry, new_dentry);
-		nfs_set_verifier(new_dentry,
-					nfs_save_change_attribute(data->new_dir));
-		break;
-	case -ENOENT:
-		nfs_dentry_handle_enoent(old_dentry);
-	}
-}
-
 /*
  * RENAME
  * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -1999,7 +1976,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	struct inode *old_inode = d_inode(old_dentry);
 	struct inode *new_inode = d_inode(new_dentry);
-	struct dentry *dentry = NULL;
+	struct dentry *dentry = NULL, *rehash = NULL;
 	struct rpc_task *task;
 	int error = -EBUSY;
 
@@ -2022,8 +1999,10 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 * To prevent any new references to the target during the
 		 * rename, we unhash the dentry in advance.
 		 */
-		if (!d_unhashed(new_dentry))
+		if (!d_unhashed(new_dentry)) {
 			d_drop(new_dentry);
+			rehash = new_dentry;
+		}
 
 		if (d_count(new_dentry) > 2) {
 			int err;
@@ -2040,6 +2019,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				goto out;
 
 			new_dentry = dentry;
+			rehash = NULL;
 			new_inode = NULL;
 		}
 	}
@@ -2048,8 +2028,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (new_inode != NULL)
 		NFS_PROTO(new_inode)->return_delegation(new_inode);
 
-	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
-					nfs_complete_rename);
+	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
 	if (IS_ERR(task)) {
 		error = PTR_ERR(task);
 		goto out;
@@ -2059,9 +2038,27 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error == 0)
 		error = task->tk_status;
 	rpc_put_task(task);
+	nfs_mark_for_revalidate(old_inode);
 out:
+	if (rehash)
+		d_rehash(rehash);
 	trace_nfs_rename_exit(old_dir, old_dentry,
 			new_dir, new_dentry, error);
+	if (!error) {
+		if (new_inode != NULL)
+			nfs_drop_nlink(new_inode);
+		/*
+		 * The d_move() should be here instead of in an async RPC completion
+		 * handler because we need the proper locks to move the dentry.  If
+		 * we're interrupted by a signal, the async RPC completion handler
+		 * should mark the directories for revalidation.
+		 */
+		d_move(old_dentry, new_dentry);
+		nfs_set_verifier(new_dentry,
+					nfs_save_change_attribute(new_dir));
+	} else if (error == -ENOENT)
+		nfs_dentry_handle_enoent(old_dentry);
+
 	/* new dentry created? */
 	if (dentry)
 		dput(dentry);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c08c46a3b8cd..dbfa18900e25 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2589,7 +2589,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
 
 	/* Except MODE, it seems harmless of setting twice. */
 	if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
-		attrset[1] & FATTR4_WORD1_MODE)
+		(attrset[1] & FATTR4_WORD1_MODE ||
+		 attrset[2] & FATTR4_WORD2_MODE_UMASK))
 		sattr->ia_valid &= ~ATTR_MODE;
 
 	if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -8416,6 +8417,7 @@ static void nfs4_layoutget_release(void *calldata)
 	size_t max_pages = max_response_pages(server);
 
 	dprintk("--> %s\n", __func__);
+	nfs4_sequence_free_slot(&lgp->res.seq_res);
 	nfs4_free_pages(lgp->args.layout.pages, max_pages);
 	pnfs_put_layout_hdr(NFS_I(inode)->layout);
 	put_nfs_open_context(lgp->args.ctx);
@@ -8490,7 +8492,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
 	/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
 	if (status == 0 && lgp->res.layoutp->len)
 		lseg = pnfs_layout_process(lgp);
-	nfs4_sequence_free_slot(&lgp->res.seq_res);
 	rpc_put_task(task);
 	dprintk("<-- %s status=%d\n", __func__, status);
 	if (status)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b34de036501b..cbf82b0d4467 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2134,6 +2134,8 @@ again:
 	put_rpccred(cred);
 	switch (status) {
 	case 0:
+	case -EINTR:
+	case -ERESTARTSYS:
 		break;
 	case -ETIMEDOUT:
 		if (clnt->cl_softrtry)
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 3b7c937a36b5..4689940a953c 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
 	struct ocfs2_lock_res *lockres;
 
 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
+	/* had_lock means that the currect process already takes the cluster
+	 * lock previously. If had_lock is 1, we have nothing to do here, and
+	 * it will get unlocked where we got the lock.
+	 */
 	if (!had_lock) {
 		ocfs2_remove_holder(lockres, oh);
 		ocfs2_inode_unlock(inode, ex);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3c5384d9b3a5..f70c3778d600 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode,
 			   void *buffer,
 			   size_t buffer_size)
 {
-	int ret;
+	int ret, had_lock;
 	struct buffer_head *di_bh = NULL;
+	struct ocfs2_lock_holder oh;
 
-	ret = ocfs2_inode_lock(inode, &di_bh, 0);
-	if (ret < 0) {
-		mlog_errno(ret);
-		return ret;
+	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
+	if (had_lock < 0) {
+		mlog_errno(had_lock);
+		return had_lock;
 	}
 	down_read(&OCFS2_I(inode)->ip_xattr_sem);
 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
 				     name, buffer, buffer_size);
 	up_read(&OCFS2_I(inode)->ip_xattr_sem);
 
-	ocfs2_inode_unlock(inode, 0);
+	ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
 
 	brelse(di_bh);
 
@@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode,
 {
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_dinode *di;
-	int ret, credits, ref_meta = 0, ref_credits = 0;
+	int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
 	struct ocfs2_refcount_tree *ref_tree = NULL;
+	struct ocfs2_lock_holder oh;
 
 	struct ocfs2_xattr_info xi = {
 		.xi_name_index = name_index,
@@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode,
 		return -ENOMEM;
 	}
 
-	ret = ocfs2_inode_lock(inode, &di_bh, 1);
-	if (ret < 0) {
+	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
+	if (had_lock < 0) {
+		ret = had_lock;
 		mlog_errno(ret);
 		goto cleanup_nolock;
 	}
@@ -3670,7 +3673,7 @@ cleanup:
 		if (ret)
 			mlog_errno(ret);
 	}
-	ocfs2_inode_unlock(inode, 1);
+	ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
 cleanup_nolock:
 	brelse(di_bh);
 	brelse(xbs.xattr_bh);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 09af0f7cd55e..3b91faacc1ba 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1316,9 +1316,12 @@ xfs_vm_bmap(
 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
 	 * bypasseѕ the file system for actual I/O.  We really can't allow
 	 * that on reflinks inodes, so we have to skip out here.  And yes,
-	 * 0 is the magic code for a bmap error..
+	 * 0 is the magic code for a bmap error.
+	 *
+	 * Since we don't pass back blockdev info, we can't return bmap
+	 * information for rt files either.
 	 */
-	if (xfs_is_reflink_inode(ip))
+	if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
 		return 0;
 
 	filemap_write_and_wait(mapping);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 197f3fffc9a7..408c7820e200 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -210,7 +210,8 @@ struct acpi_device_flags {
 	u32 of_compatible_ok:1;
 	u32 coherent_dma:1;
 	u32 cca_seen:1;
-	u32 reserved:20;
+	u32 spi_i2c_slave:1;
+	u32 reserved:19;
 };
 
 /* File System */
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index c1da539f5e28..4d97a89da066 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -254,7 +254,7 @@ static inline void atm_return(struct atm_vcc *vcc,int truesize)
 
 static inline int atm_may_send(struct atm_vcc *vcc,unsigned int size)
 {
-	return (size + atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) <
+	return (size + refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) <
 	       sk_atm(vcc)->sk_sndbuf;
 }
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d1b04b0e99cf..a7e29fa0981f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -426,6 +426,7 @@ extern void bio_advance(struct bio *, unsigned);
 
 extern void bio_init(struct bio *bio, struct bio_vec *table,
 		     unsigned short max_vecs);
+extern void bio_uninit(struct bio *);
 extern void bio_reset(struct bio *);
 void bio_chain(struct bio *, struct bio *);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b74a3edcb3da..1ddd36bd2173 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -391,6 +391,8 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
+	atomic_t		shared_hctx_restart;
+
 	struct blk_queue_stats	*stats;
 	struct rq_wb		*rq_wb;
 
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c970a25d2a49..360c082e885c 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -7,6 +7,7 @@
 struct sock;
 struct cgroup;
 struct sk_buff;
+struct bpf_sock_ops_kern;
 
 #ifdef CONFIG_CGROUP_BPF
 
@@ -42,6 +43,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 int __cgroup_bpf_run_filter_sk(struct sock *sk,
 			       enum bpf_attach_type type);
 
+int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+				     struct bpf_sock_ops_kern *sock_ops,
+				     enum bpf_attach_type type);
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 ({									      \
@@ -75,6 +80,18 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 	__ret;								       \
 })
 
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)				       \
+({									       \
+	int __ret = 0;							       \
+	if (cgroup_bpf_enabled && (sock_ops)->sk) {	       \
+		typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk);	       \
+		if (sk_fullsock(__sk))					       \
+			__ret = __cgroup_bpf_run_filter_sock_ops(__sk,	       \
+								 sock_ops,     \
+							 BPF_CGROUP_SOCK_OPS); \
+	}								       \
+	__ret;								       \
+})
 #else
 
 struct cgroup_bpf {};
@@ -85,6 +102,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 
 #endif /* CONFIG_CGROUP_BPF */
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index deca4e7f2845..b69e7a5869ff 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,6 +36,7 @@ struct bpf_map_ops {
 				int fd);
 	void (*map_fd_put_ptr)(void *ptr);
 	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
+	u32 (*map_fd_sys_lookup_elem)(void *ptr);
 };
 
 struct bpf_map {
@@ -155,9 +156,14 @@ struct bpf_prog;
 struct bpf_insn_access_aux {
 	enum bpf_reg_type reg_type;
 	int ctx_field_size;
-	int converted_op_size;
 };
 
+static inline void
+bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
+{
+	aux->ctx_field_size = size;
+}
+
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
 	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
@@ -172,7 +178,7 @@ struct bpf_verifier_ops {
 	u32 (*convert_ctx_access)(enum bpf_access_type type,
 				  const struct bpf_insn *src,
 				  struct bpf_insn *dst,
-				  struct bpf_prog *prog);
+				  struct bpf_prog *prog, u32 *target_size);
 	int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
 			union bpf_attr __user *uattr);
 };
@@ -288,9 +294,11 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
 
 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
 				 void *key, void *value, u64 map_flags);
+int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 void bpf_fd_array_map_clear(struct bpf_map *map);
 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 				void *key, void *value, u64 map_flags);
+int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
  * forced to use 'long' read/writes to try to atomically copy long counters.
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 03bf223f18be..3d137c33d664 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -10,6 +10,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
 #endif
 #ifdef CONFIG_BPF_EVENTS
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1fa26dc562ce..f1fc9baa3509 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -337,6 +337,22 @@ struct bpf_prog_aux;
 	bpf_size;						\
 })
 
+#define bpf_size_to_bytes(bpf_size)				\
+({								\
+	int bytes = -EINVAL;					\
+								\
+	if (bpf_size == BPF_B)					\
+		bytes = sizeof(u8);				\
+	else if (bpf_size == BPF_H)				\
+		bytes = sizeof(u16);				\
+	else if (bpf_size == BPF_W)				\
+		bytes = sizeof(u32);				\
+	else if (bpf_size == BPF_DW)				\
+		bytes = sizeof(u64);				\
+								\
+	bytes;							\
+})
+
 #define BPF_SIZEOF(type)					\
 	({							\
 		const int __size = bytes_to_bpf_size(sizeof(type)); \
@@ -351,6 +367,13 @@ struct bpf_prog_aux;
 		__size;						\
 	})
 
+#define BPF_LDST_BYTES(insn)					\
+	({							\
+		const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \
+		WARN_ON(__size < 0);				\
+		__size;						\
+	})
+
 #define __BPF_MAP_0(m, v, ...) v
 #define __BPF_MAP_1(m, v, t, a, ...) m(t, a)
 #define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__)
@@ -401,6 +424,18 @@ struct bpf_prog_aux;
 #define BPF_CALL_4(name, ...)	BPF_CALL_x(4, name, __VA_ARGS__)
 #define BPF_CALL_5(name, ...)	BPF_CALL_x(5, name, __VA_ARGS__)
 
+#define bpf_ctx_range(TYPE, MEMBER)						\
+	offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
+#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)				\
+	offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
+
+#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE)				\
+	({									\
+		BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE));		\
+		*(PTR_SIZE) = (SIZE);						\
+		offsetof(TYPE, MEMBER);						\
+	})
+
 #ifdef CONFIG_COMPAT
 /* A struct sock_filter is architecture independent. */
 struct compat_sock_fprog {
@@ -564,6 +599,18 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
 	return prog->type == BPF_PROG_TYPE_UNSPEC;
 }
 
+static inline bool
+bpf_ctx_narrow_access_ok(u32 off, u32 size, const u32 size_default)
+{
+	bool off_ok;
+#ifdef __LITTLE_ENDIAN
+	off_ok = (off & (size_default - 1)) == 0;
+#else
+	off_ok = (off & (size_default - 1)) + size == size_default;
+#endif
+	return off_ok && size <= size_default && (size & (size - 1)) == 0;
+}
+
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
 #ifdef CONFIG_ARCH_HAS_SET_MEMORY
@@ -898,4 +945,13 @@ static inline int bpf_tell_extensions(void)
 	return SKF_AD_MAX;
 }
 
+struct bpf_sock_ops_kern {
+	struct	sock *sk;
+	u32	op;
+	union {
+		u32 reply;
+		u32 replylong[4];
+	};
+};
+
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 12f6fba6d21a..97caf1821de8 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/timer.h>
 #include <linux/in.h>
+#include <linux/refcount.h>
 #include <uapi/linux/igmp.h>
 
 static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
@@ -84,7 +85,7 @@ struct ip_mc_list {
 	struct ip_mc_list __rcu *next_hash;
 	struct timer_list	timer;
 	int			users;
-	atomic_t		refcnt;
+	refcount_t		refcnt;
 	spinlock_t		lock;
 	char			tm_running;
 	char			reporter;
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index e7c04c4e4bcd..fb3f809e34e4 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -11,6 +11,7 @@
 #include <linux/timer.h>
 #include <linux/sysctl.h>
 #include <linux/rtnetlink.h>
+#include <linux/refcount.h>
 
 struct ipv4_devconf {
 	void	*sysctl;
@@ -22,7 +23,7 @@ struct ipv4_devconf {
 
 struct in_device {
 	struct net_device	*dev;
-	atomic_t		refcnt;
+	refcount_t		refcnt;
 	int			dead;
 	struct in_ifaddr	*ifa_list;	/* IP ifaddr chain		*/
 
@@ -219,7 +220,7 @@ static inline struct in_device *in_dev_get(const struct net_device *dev)
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev);
 	if (in_dev)
-		atomic_inc(&in_dev->refcnt);
+		refcount_inc(&in_dev->refcnt);
 	rcu_read_unlock();
 	return in_dev;
 }
@@ -240,12 +241,12 @@ void in_dev_finish_destroy(struct in_device *idev);
 
 static inline void in_dev_put(struct in_device *idev)
 {
-	if (atomic_dec_and_test(&idev->refcnt))
+	if (refcount_dec_and_test(&idev->refcnt))
 		in_dev_finish_destroy(idev);
 }
 
-#define __in_dev_put(idev)  atomic_dec(&(idev)->refcnt)
-#define in_dev_hold(idev)   atomic_inc(&(idev)->refcnt)
+#define __in_dev_put(idev)  refcount_dec(&(idev)->refcnt)
+#define in_dev_hold(idev)   refcount_inc(&(idev)->refcnt)
 
 #endif /* __KERNEL__ */
 
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 556e1c31b5d0..f31a0b5377e1 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1103,6 +1103,9 @@ enum mlx5_mcam_feature_groups {
 #define MLX5_CAP_FPGA(mdev, cap) \
 	MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
 
+#define MLX5_CAP64_FPGA(mdev, cap) \
+	MLX5_GET64(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 750701b3b863..df6ce59a1f95 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -44,6 +44,7 @@
 #include <linux/workqueue.h>
 #include <linux/mempool.h>
 #include <linux/interrupt.h>
+#include <linux/idr.h>
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
@@ -110,6 +111,7 @@ enum {
 	MLX5_REG_DCBX_APP        = 0x4021,
 	MLX5_REG_FPGA_CAP	 = 0x4022,
 	MLX5_REG_FPGA_CTRL	 = 0x4023,
+	MLX5_REG_FPGA_ACCESS_REG = 0x4024,
 	MLX5_REG_PCAP		 = 0x5001,
 	MLX5_REG_PMTU		 = 0x5003,
 	MLX5_REG_PTYS		 = 0x5004,
@@ -737,6 +739,14 @@ struct mlx5e_resources {
 	struct mlx5_sq_bfreg       bfreg;
 };
 
+#define MLX5_MAX_RESERVED_GIDS 8
+
+struct mlx5_rsvd_gids {
+	unsigned int start;
+	unsigned int count;
+	struct ida ida;
+};
+
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
 	/* sync pci state */
@@ -766,6 +776,10 @@ struct mlx5_core_dev {
 	atomic_t		num_qps;
 	u32			issi;
 	struct mlx5e_resources  mlx5e_res;
+	struct {
+		struct mlx5_rsvd_gids	reserved_gids;
+		atomic_t                roce_en;
+	} roce;
 #ifdef CONFIG_MLX5_FPGA
 	struct mlx5_fpga_device *fpga;
 #endif
@@ -932,6 +946,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev);
 void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
 void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 			struct mlx5_buf *buf, int node);
 int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
@@ -1045,6 +1060,11 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
 		     bool map_wc, bool fast_path);
 void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
 
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev);
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+			   u8 roce_version, u8 roce_l3_type, const u8 *gid,
+			   const u8 *mac, bool vlan, u16 vlan_id);
+
 static inline int fw_initializing(struct mlx5_core_dev *dev)
 {
 	return ioread32be(&dev->iseg->initializing) >> 31;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index d6b99d5d0f24..87869c04849a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -232,6 +232,11 @@ enum {
 	MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
 	MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
 	MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
+	MLX5_CMD_OP_FPGA_CREATE_QP                = 0x960,
+	MLX5_CMD_OP_FPGA_MODIFY_QP                = 0x961,
+	MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
+	MLX5_CMD_OP_FPGA_DESTROY_QP               = 0x963,
+	MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS        = 0x964,
 	MLX5_CMD_OP_MAX
 };
 
@@ -600,7 +605,10 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         tunnel_statless_gre[0x1];
 	u8         tunnel_stateless_vxlan[0x1];
 
-	u8         reserved_at_20[0x20];
+	u8         swp[0x1];
+	u8         swp_csum[0x1];
+	u8         swp_lso[0x1];
+	u8         reserved_at_23[0x1d];
 
 	u8         reserved_at_40[0x10];
 	u8         lro_min_mss_size[0x10];
@@ -2433,7 +2441,8 @@ struct mlx5_ifc_sqc_bits {
 	u8	   min_wqe_inline_mode[0x3];
 	u8         state[0x4];
 	u8         reg_umr[0x1];
-	u8         reserved_at_d[0x13];
+	u8         allow_swp[0x1];
+	u8         reserved_at_e[0x12];
 
 	u8         reserved_at_20[0x8];
 	u8         user_index[0x18];
@@ -8304,6 +8313,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_sltp_reg_bits sltp_reg;
 	struct mlx5_ifc_mtpps_reg_bits mtpps_reg;
 	struct mlx5_ifc_mtppse_reg_bits mtppse_reg;
+	struct mlx5_ifc_fpga_access_reg_bits fpga_access_reg;
 	struct mlx5_ifc_fpga_ctrl_bits fpga_ctrl_bits;
 	struct mlx5_ifc_fpga_cap_bits fpga_cap_bits;
 	struct mlx5_ifc_mcqi_reg_bits mcqi_reg;
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 0032d10ac6cf..255a88d08078 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -32,6 +32,14 @@
 #ifndef MLX5_IFC_FPGA_H
 #define MLX5_IFC_FPGA_H
 
+enum {
+	MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9,
+};
+
+enum {
+	MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC    = 0x2,
+};
+
 struct mlx5_ifc_fpga_shell_caps_bits {
 	u8         max_num_qps[0x10];
 	u8         reserved_at_10[0x8];
@@ -108,6 +116,15 @@ struct mlx5_ifc_fpga_cap_bits {
 	u8         reserved_at_500[0x300];
 };
 
+enum {
+	MLX5_FPGA_CTRL_OPERATION_LOAD                = 0x1,
+	MLX5_FPGA_CTRL_OPERATION_RESET               = 0x2,
+	MLX5_FPGA_CTRL_OPERATION_FLASH_SELECT        = 0x3,
+	MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON   = 0x4,
+	MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF  = 0x5,
+	MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX       = 0x6,
+};
+
 struct mlx5_ifc_fpga_ctrl_bits {
 	u8         reserved_at_0[0x8];
 	u8         operation[0x8];
@@ -141,4 +158,275 @@ struct mlx5_ifc_fpga_error_event_bits {
 	u8         reserved_at_60[0x80];
 };
 
+#define MLX5_FPGA_ACCESS_REG_SIZE_MAX 64
+
+struct mlx5_ifc_fpga_access_reg_bits {
+	u8         reserved_at_0[0x20];
+
+	u8         reserved_at_20[0x10];
+	u8         size[0x10];
+
+	u8         address[0x40];
+
+	u8         data[0][0x8];
+};
+
+enum mlx5_ifc_fpga_qp_state {
+	MLX5_FPGA_QPC_STATE_INIT    = 0x0,
+	MLX5_FPGA_QPC_STATE_ACTIVE  = 0x1,
+	MLX5_FPGA_QPC_STATE_ERROR   = 0x2,
+};
+
+enum mlx5_ifc_fpga_qp_type {
+	MLX5_FPGA_QPC_QP_TYPE_SHELL_QP    = 0x0,
+	MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP  = 0x1,
+};
+
+enum mlx5_ifc_fpga_qp_service_type {
+	MLX5_FPGA_QPC_ST_RC  = 0x0,
+};
+
+struct mlx5_ifc_fpga_qpc_bits {
+	u8         state[0x4];
+	u8         reserved_at_4[0x1b];
+	u8         qp_type[0x1];
+
+	u8         reserved_at_20[0x4];
+	u8         st[0x4];
+	u8         reserved_at_28[0x10];
+	u8         traffic_class[0x8];
+
+	u8         ether_type[0x10];
+	u8         prio[0x3];
+	u8         dei[0x1];
+	u8         vid[0xc];
+
+	u8         reserved_at_60[0x20];
+
+	u8         reserved_at_80[0x8];
+	u8         next_rcv_psn[0x18];
+
+	u8         reserved_at_a0[0x8];
+	u8         next_send_psn[0x18];
+
+	u8         reserved_at_c0[0x10];
+	u8         pkey[0x10];
+
+	u8         reserved_at_e0[0x8];
+	u8         remote_qpn[0x18];
+
+	u8         reserved_at_100[0x15];
+	u8         rnr_retry[0x3];
+	u8         reserved_at_118[0x5];
+	u8         retry_count[0x3];
+
+	u8         reserved_at_120[0x20];
+
+	u8         reserved_at_140[0x10];
+	u8         remote_mac_47_32[0x10];
+
+	u8         remote_mac_31_0[0x20];
+
+	u8         remote_ip[16][0x8];
+
+	u8         reserved_at_200[0x40];
+
+	u8         reserved_at_240[0x10];
+	u8         fpga_mac_47_32[0x10];
+
+	u8         fpga_mac_31_0[0x20];
+
+	u8         fpga_ip[16][0x8];
+};
+
+struct mlx5_ifc_fpga_create_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_create_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x8];
+	u8         fpga_qpn[0x18];
+
+	u8         reserved_at_60[0x20];
+
+	struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_modify_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         fpga_qpn[0x18];
+
+	u8         field_select[0x20];
+
+	struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_modify_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_fpga_query_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         fpga_qpn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_query_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_query_qp_counters_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         clear[0x1];
+	u8         reserved_at_41[0x7];
+	u8         fpga_qpn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_query_qp_counters_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	u8         rx_ack_packets[0x40];
+
+	u8         rx_send_packets[0x40];
+
+	u8         tx_ack_packets[0x40];
+
+	u8         tx_send_packets[0x40];
+
+	u8         rx_total_drop[0x40];
+
+	u8         reserved_at_1c0[0x1c0];
+};
+
+struct mlx5_ifc_fpga_destroy_qp_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x8];
+	u8         fpga_qpn[0x18];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_destroy_qp_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ipsec_extended_cap_bits {
+	u8         encapsulation[0x20];
+
+	u8         reserved_0[0x15];
+	u8         ipv4_fragment[0x1];
+	u8         ipv6[0x1];
+	u8         esn[0x1];
+	u8         lso[0x1];
+	u8         transport_and_tunnel_mode[0x1];
+	u8         tunnel_mode[0x1];
+	u8         transport_mode[0x1];
+	u8         ah_esp[0x1];
+	u8         esp[0x1];
+	u8         ah[0x1];
+	u8         ipv4_options[0x1];
+
+	u8         auth_alg[0x20];
+
+	u8         enc_alg[0x20];
+
+	u8         sa_cap[0x20];
+
+	u8         reserved_1[0x10];
+	u8         number_of_ipsec_counters[0x10];
+
+	u8         ipsec_counters_addr_low[0x20];
+	u8         ipsec_counters_addr_high[0x20];
+};
+
+struct mlx5_ifc_ipsec_counters_bits {
+	u8         dec_in_packets[0x40];
+
+	u8         dec_out_packets[0x40];
+
+	u8         dec_bypass_packets[0x40];
+
+	u8         enc_in_packets[0x40];
+
+	u8         enc_out_packets[0x40];
+
+	u8         enc_bypass_packets[0x40];
+
+	u8         drop_dec_packets[0x40];
+
+	u8         failed_auth_dec_packets[0x40];
+
+	u8         drop_enc_packets[0x40];
+
+	u8         success_add_sa[0x40];
+
+	u8         fail_add_sa[0x40];
+
+	u8         success_delete_sa[0x40];
+
+	u8         fail_delete_sa[0x40];
+
+	u8         dropped_cmd[0x40];
+};
+
 #endif /* MLX5_IFC_FPGA_H */
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 1f637f4d1265..6f41270d80c0 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -225,10 +225,20 @@ enum {
 	MLX5_ETH_WQE_INSERT_VLAN        = 1 << 15,
 };
 
+enum {
+	MLX5_ETH_WQE_SWP_INNER_L3_IPV6  = 1 << 0,
+	MLX5_ETH_WQE_SWP_INNER_L4_UDP   = 1 << 1,
+	MLX5_ETH_WQE_SWP_OUTER_L3_IPV6  = 1 << 4,
+	MLX5_ETH_WQE_SWP_OUTER_L4_UDP   = 1 << 5,
+};
+
 struct mlx5_wqe_eth_seg {
-	u8              rsvd0[4];
+	u8              swp_outer_l4_offset;
+	u8              swp_outer_l3_offset;
+	u8              swp_inner_l4_offset;
+	u8              swp_inner_l3_offset;
 	u8              cs_flags;
-	u8              rsvd1;
+	u8              swp_flags;
 	__be16          mss;
 	__be32          rsvd2;
 	union {
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 996711d8a7b4..41d04e9d088a 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -1,7 +1,6 @@
 #ifndef _NFNETLINK_H
 #define _NFNETLINK_H
 
-
 #include <linux/netlink.h>
 #include <linux/capability.h>
 #include <net/netlink.h>
@@ -10,13 +9,16 @@
 struct nfnl_callback {
 	int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb,
 		    const struct nlmsghdr *nlh,
-		    const struct nlattr * const cda[]);
+		    const struct nlattr * const cda[],
+		    struct netlink_ext_ack *extack);
 	int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
-			const struct nlattr * const cda[]);
+			const struct nlattr * const cda[],
+			struct netlink_ext_ack *extack);
 	int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb,
 			  const struct nlmsghdr *nlh,
-			  const struct nlattr * const cda[]);
+			  const struct nlattr * const cda[],
+			  struct netlink_ext_ack *extack);
 	const struct nla_policy *policy;	/* netlink attribute policy */
 	const u_int16_t attr_count;		/* number of nlattr's */
 };
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index e0cbf17af780..2c2a5514b0df 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -122,8 +122,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS))
 /* Clear the bit in the hook mask that tells if the rule is on a base chain */
 #define CLEAR_BASE_CHAIN_BIT (par->hook_mask &= ~(1 << NF_BR_NUMHOOKS))
-/* True if the target is not a standard target */
-#define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
 
 static inline bool ebt_invalid_target(int target)
 {
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 1828900c9411..27c0aaa22cb0 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
+#include <linux/refcount.h>
 
 union inet_addr {
 	__u32		all[4];
@@ -34,7 +35,7 @@ struct netpoll {
 };
 
 struct netpoll_info {
-	atomic_t refcnt;
+	refcount_t refcnt;
 
 	struct semaphore dev_lock;
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1d8d70193782..2a9567bb8186 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -372,6 +372,7 @@ struct phy_c45_device_ids {
  * has_fixups: Set to true if this phy has fixups/quirks.
  * suspended: Set to true if this phy has been suspended successfully.
  * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
+ * loopback_enabled: Set true if this phy has been loopbacked successfully.
  * state: state of the PHY for management purposes
  * dev_flags: Device-specific flags used by the PHY driver.
  * link_timeout: The number of timer firings to wait before the
@@ -409,6 +410,7 @@ struct phy_device {
 	bool has_fixups;
 	bool suspended;
 	bool sysfs_links;
+	bool loopback_enabled;
 
 	enum phy_state state;
 
@@ -648,6 +650,7 @@ struct phy_driver {
 	int (*set_tunable)(struct phy_device *dev,
 			    struct ethtool_tunable *tuna,
 			    const void *data);
+	int (*set_loopback)(struct phy_device *dev, bool enable);
 };
 #define to_phy_driver(d) container_of(to_mdio_common_driver(d),		\
 				      struct phy_driver, mdiodrv)
@@ -793,6 +796,7 @@ void phy_device_remove(struct phy_device *phydev);
 int phy_init_hw(struct phy_device *phydev);
 int phy_suspend(struct phy_device *phydev);
 int phy_resume(struct phy_device *phydev);
+int phy_loopback(struct phy_device *phydev, bool enable);
 struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
 			      phy_interface_t interface);
 struct phy_device *phy_find_first(struct mii_bus *bus);
@@ -847,6 +851,7 @@ int genphy_update_link(struct phy_device *phydev);
 int genphy_read_status(struct phy_device *phydev);
 int genphy_suspend(struct phy_device *phydev);
 int genphy_resume(struct phy_device *phydev);
+int genphy_loopback(struct phy_device *phydev, bool enable);
 int genphy_soft_reset(struct phy_device *phydev);
 static inline int genphy_no_soft_reset(struct phy_device *phydev)
 {
diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h
index a6f9d633f5be..9e75ac8d19be 100644
--- a/include/linux/platform_data/nfcmrvl.h
+++ b/include/linux/platform_data/nfcmrvl.h
@@ -23,7 +23,7 @@ struct nfcmrvl_platform_data {
 	 */
 
 	/* GPIO that is wired to RESET_N signal */
-	unsigned int reset_n_io;
+	int reset_n_io;
 	/* Tell if transport is muxed in HCI one */
 	unsigned int hci_muxed;
 
diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h
deleted file mode 100644
index f6494b347c06..000000000000
--- a/include/linux/platform_data/st-nci.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Driver include for ST NCI NFC chip family.
- *
- * Copyright (C) 2014-2015  STMicroelectronics SAS. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ST_NCI_H_
-#define _ST_NCI_H_
-
-#define ST_NCI_DRIVER_NAME "st_nci"
-
-struct st_nci_nfc_platform_data {
-	unsigned int gpio_reset;
-	unsigned int irq_polarity;
-	bool is_ese_present;
-	bool is_uicc_present;
-};
-
-#endif /* _ST_NCI_H_ */
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index a567cbf8c5b4..39e2a2ac2471 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -38,6 +38,8 @@
 #include <linux/slab.h>
 
 /* dma_addr_t manip */
+#define PTR_LO(x)               ((u32)(((uintptr_t)(x)) & 0xffffffff))
+#define PTR_HI(x)               ((u32)((((uintptr_t)(x)) >> 16) >> 16))
 #define DMA_LO_LE(x)		cpu_to_le32(lower_32_bits(x))
 #define DMA_HI_LE(x)		cpu_to_le32(upper_32_bits(x))
 #define DMA_REGPAIR_LE(x, val)	do { \
@@ -778,7 +780,7 @@ enum protocol_type {
 	PROTOCOLID_ROCE,
 	PROTOCOLID_CORE,
 	PROTOCOLID_ETH,
-	PROTOCOLID_RESERVED4,
+	PROTOCOLID_IWARP,
 	PROTOCOLID_RESERVED5,
 	PROTOCOLID_PREROCE,
 	PROTOCOLID_COMMON,
diff --git a/include/linux/qed/iwarp_common.h b/include/linux/qed/iwarp_common.h
new file mode 100644
index 000000000000..b8b3e1cfae90
--- /dev/null
+++ b/include/linux/qed/iwarp_common.h
@@ -0,0 +1,53 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWARP_COMMON__
+#define __IWARP_COMMON__
+#include <linux/qed/rdma_common.h>
+/************************/
+/* IWARP FW CONSTANTS	*/
+/************************/
+
+#define IWARP_ACTIVE_MODE 0
+#define IWARP_PASSIVE_MODE 1
+
+#define IWARP_SHARED_QUEUE_PAGE_SIZE		(0x8000)
+#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET   (0x4000)
+#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE (0x1000)
+#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET   (0x5000)
+#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE (0x3000)
+
+#define IWARP_REQ_MAX_INLINE_DATA_SIZE          (128)
+#define IWARP_REQ_MAX_SINGLE_SQ_WQE_SIZE        (176)
+
+#define IWARP_MAX_QPS                           (64 * 1024)
+
+#endif /* __IWARP_COMMON__ */
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index 5958b45eb699..dd7a3b86bb9e 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -47,9 +47,10 @@ enum qed_ll2_conn_type {
 	QED_LL2_TYPE_FCOE,
 	QED_LL2_TYPE_ISCSI,
 	QED_LL2_TYPE_TEST,
-	QED_LL2_TYPE_ISCSI_OOO,
+	QED_LL2_TYPE_OOO,
 	QED_LL2_TYPE_RESERVED2,
 	QED_LL2_TYPE_ROCE,
+	QED_LL2_TYPE_IWARP,
 	QED_LL2_TYPE_RESERVED3,
 	MAX_QED_LL2_RX_CONN_TYPE
 };
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index ff9be01b5f53..4dd72ba210f5 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -470,6 +470,101 @@ struct qed_rdma_counters_out_params {
 #define QED_ROCE_TX_HEAD_FAILURE        (1)
 #define QED_ROCE_TX_FRAG_FAILURE        (2)
 
+enum qed_iwarp_event_type {
+	QED_IWARP_EVENT_MPA_REQUEST,	  /* Passive side request received */
+	QED_IWARP_EVENT_PASSIVE_COMPLETE, /* ack on mpa response */
+	QED_IWARP_EVENT_ACTIVE_COMPLETE,  /* Active side reply received */
+	QED_IWARP_EVENT_DISCONNECT,
+	QED_IWARP_EVENT_CLOSE,
+	QED_IWARP_EVENT_IRQ_FULL,
+	QED_IWARP_EVENT_RQ_EMPTY,
+	QED_IWARP_EVENT_LLP_TIMEOUT,
+	QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR,
+	QED_IWARP_EVENT_CQ_OVERFLOW,
+	QED_IWARP_EVENT_QP_CATASTROPHIC,
+	QED_IWARP_EVENT_ACTIVE_MPA_REPLY,
+	QED_IWARP_EVENT_LOCAL_ACCESS_ERROR,
+	QED_IWARP_EVENT_REMOTE_OPERATION_ERROR,
+	QED_IWARP_EVENT_TERMINATE_RECEIVED
+};
+
+enum qed_tcp_ip_version {
+	QED_TCP_IPV4,
+	QED_TCP_IPV6,
+};
+
+struct qed_iwarp_cm_info {
+	enum qed_tcp_ip_version ip_version;
+	u32 remote_ip[4];
+	u32 local_ip[4];
+	u16 remote_port;
+	u16 local_port;
+	u16 vlan;
+	u8 ord;
+	u8 ird;
+	u16 private_data_len;
+	const void *private_data;
+};
+
+struct qed_iwarp_cm_event_params {
+	enum qed_iwarp_event_type event;
+	const struct qed_iwarp_cm_info *cm_info;
+	void *ep_context;	/* To be passed to accept call */
+	int status;
+};
+
+typedef int (*iwarp_event_handler) (void *context,
+				    struct qed_iwarp_cm_event_params *event);
+
+struct qed_iwarp_connect_in {
+	iwarp_event_handler event_cb;
+	void *cb_context;
+	struct qed_rdma_qp *qp;
+	struct qed_iwarp_cm_info cm_info;
+	u16 mss;
+	u8 remote_mac_addr[ETH_ALEN];
+	u8 local_mac_addr[ETH_ALEN];
+};
+
+struct qed_iwarp_connect_out {
+	void *ep_context;
+};
+
+struct qed_iwarp_listen_in {
+	iwarp_event_handler event_cb;
+	void *cb_context;	/* passed to event_cb */
+	u32 max_backlog;
+	enum qed_tcp_ip_version ip_version;
+	u32 ip_addr[4];
+	u16 port;
+	u16 vlan;
+};
+
+struct qed_iwarp_listen_out {
+	void *handle;
+};
+
+struct qed_iwarp_accept_in {
+	void *ep_context;
+	void *cb_context;
+	struct qed_rdma_qp *qp;
+	const void *private_data;
+	u16 private_data_len;
+	u8 ord;
+	u8 ird;
+};
+
+struct qed_iwarp_reject_in {
+	void *ep_context;
+	void *cb_context;
+	const void *private_data;
+	u16 private_data_len;
+};
+
+struct qed_iwarp_send_rtr_in {
+	void *ep_context;
+};
+
 struct qed_roce_ll2_header {
 	void *vaddr;
 	dma_addr_t baddr;
@@ -491,6 +586,7 @@ struct qed_roce_ll2_packet {
 
 enum qed_rdma_type {
 	QED_RDMA_TYPE_ROCE,
+	QED_RDMA_TYPE_IWARP
 };
 
 struct qed_dev_rdma_info {
@@ -575,6 +671,24 @@ struct qed_rdma_ops {
 	int (*ll2_set_mac_filter)(struct qed_dev *cdev,
 				  u8 *old_mac_address, u8 *new_mac_address);
 
+	int (*iwarp_connect)(void *rdma_cxt,
+			     struct qed_iwarp_connect_in *iparams,
+			     struct qed_iwarp_connect_out *oparams);
+
+	int (*iwarp_create_listen)(void *rdma_cxt,
+				   struct qed_iwarp_listen_in *iparams,
+				   struct qed_iwarp_listen_out *oparams);
+
+	int (*iwarp_accept)(void *rdma_cxt,
+			    struct qed_iwarp_accept_in *iparams);
+
+	int (*iwarp_reject)(void *rdma_cxt,
+			    struct qed_iwarp_reject_in *iparams);
+
+	int (*iwarp_destroy_listen)(void *rdma_cxt, void *handle);
+
+	int (*iwarp_send_rtr)(void *rdma_cxt,
+			      struct qed_iwarp_send_rtr_in *iparams);
 };
 
 const struct qed_rdma_ops *qed_get_rdma_ops(void);
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 7a4804c4a593..99e866487e2f 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -57,12 +57,12 @@
 #include <uapi/linux/sctp.h>
 
 /* Section 3.1.  SCTP Common Header Format */
-typedef struct sctphdr {
+struct sctphdr {
 	__be16 source;
 	__be16 dest;
 	__be32 vtag;
 	__le32 checksum;
-} sctp_sctphdr_t;
+};
 
 static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
 {
@@ -70,11 +70,11 @@ static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
 }
 
 /* Section 3.2.  Chunk Field Descriptions. */
-typedef struct sctp_chunkhdr {
+struct sctp_chunkhdr {
 	__u8 type;
 	__u8 flags;
 	__be16 length;
-} sctp_chunkhdr_t;
+};
 
 
 /* Section 3.2.  Chunk Type Values.
@@ -82,7 +82,7 @@ typedef struct sctp_chunkhdr {
  * Value field. It takes a value from 0 to 254. The value of 255 is
  * reserved for future use as an extension field.
  */
-typedef enum {
+enum sctp_cid {
 	SCTP_CID_DATA			= 0,
         SCTP_CID_INIT			= 1,
         SCTP_CID_INIT_ACK		= 2,
@@ -109,7 +109,7 @@ typedef enum {
 	SCTP_CID_ASCONF			= 0xC1,
 	SCTP_CID_ASCONF_ACK		= 0x80,
 	SCTP_CID_RECONF			= 0x82,
-} sctp_cid_t; /* enum */
+}; /* enum */
 
 
 /* Section 3.2
@@ -117,12 +117,12 @@ typedef enum {
  *  the action that must be taken if the processing endpoint does not
  *  recognize the Chunk Type.
  */
-typedef enum {
+enum {
 	SCTP_CID_ACTION_DISCARD     = 0x00,
 	SCTP_CID_ACTION_DISCARD_ERR = 0x40,
 	SCTP_CID_ACTION_SKIP        = 0x80,
 	SCTP_CID_ACTION_SKIP_ERR    = 0xc0,
-} sctp_cid_action_t;
+};
 
 enum { SCTP_CID_ACTION_MASK = 0xc0, };
 
@@ -162,12 +162,12 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 };
  * Section 3.2.1 Optional/Variable-length Parmaeter Format.
  */
 
-typedef struct sctp_paramhdr {
+struct sctp_paramhdr {
 	__be16 type;
 	__be16 length;
-} sctp_paramhdr_t;
+};
 
-typedef enum {
+enum sctp_param {
 
 	/* RFC 2960 Section 3.3.5 */
 	SCTP_PARAM_HEARTBEAT_INFO		= cpu_to_be16(1),
@@ -207,7 +207,7 @@ typedef enum {
 	SCTP_PARAM_RESET_RESPONSE		= cpu_to_be16(0x0010),
 	SCTP_PARAM_RESET_ADD_OUT_STREAMS	= cpu_to_be16(0x0011),
 	SCTP_PARAM_RESET_ADD_IN_STREAMS		= cpu_to_be16(0x0012),
-} sctp_param_t; /* enum */
+}; /* enum */
 
 
 /* RFC 2960 Section 3.2.1
@@ -216,29 +216,29 @@ typedef enum {
  *  not recognize the Parameter Type.
  *
  */
-typedef enum {
+enum {
 	SCTP_PARAM_ACTION_DISCARD     = cpu_to_be16(0x0000),
 	SCTP_PARAM_ACTION_DISCARD_ERR = cpu_to_be16(0x4000),
 	SCTP_PARAM_ACTION_SKIP        = cpu_to_be16(0x8000),
 	SCTP_PARAM_ACTION_SKIP_ERR    = cpu_to_be16(0xc000),
-} sctp_param_action_t;
+};
 
 enum { SCTP_PARAM_ACTION_MASK = cpu_to_be16(0xc000), };
 
 /* RFC 2960 Section 3.3.1 Payload Data (DATA) (0) */
 
-typedef struct sctp_datahdr {
+struct sctp_datahdr {
 	__be32 tsn;
 	__be16 stream;
 	__be16 ssn;
 	__be32 ppid;
 	__u8  payload[0];
-} sctp_datahdr_t;
+};
 
-typedef struct sctp_data_chunk {
-        sctp_chunkhdr_t chunk_hdr;
-        sctp_datahdr_t  data_hdr;
-} sctp_data_chunk_t;
+struct sctp_data_chunk {
+	struct sctp_chunkhdr chunk_hdr;
+	struct sctp_datahdr data_hdr;
+};
 
 /* DATA Chuck Specific Flags */
 enum {
@@ -257,54 +257,54 @@ enum { SCTP_DATA_FRAG_MASK = 0x03, };
  *  This chunk is used to initiate a SCTP association between two
  *  endpoints.
  */
-typedef struct sctp_inithdr {
+struct sctp_inithdr {
 	__be32 init_tag;
 	__be32 a_rwnd;
 	__be16 num_outbound_streams;
 	__be16 num_inbound_streams;
 	__be32 initial_tsn;
 	__u8  params[0];
-} sctp_inithdr_t;
+};
 
-typedef struct sctp_init_chunk {
-	sctp_chunkhdr_t chunk_hdr;
-	sctp_inithdr_t init_hdr;
-} sctp_init_chunk_t;
+struct sctp_init_chunk {
+	struct sctp_chunkhdr chunk_hdr;
+	struct sctp_inithdr init_hdr;
+};
 
 
 /* Section 3.3.2.1. IPv4 Address Parameter (5) */
 typedef struct sctp_ipv4addr_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	struct in_addr  addr;
 } sctp_ipv4addr_param_t;
 
 /* Section 3.3.2.1. IPv6 Address Parameter (6) */
 typedef struct sctp_ipv6addr_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	struct in6_addr addr;
 } sctp_ipv6addr_param_t;
 
 /* Section 3.3.2.1 Cookie Preservative (9) */
 typedef struct sctp_cookie_preserve_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__be32          lifespan_increment;
 } sctp_cookie_preserve_param_t;
 
 /* Section 3.3.2.1 Host Name Address (11) */
 typedef struct sctp_hostname_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	uint8_t hostname[0];
 } sctp_hostname_param_t;
 
 /* Section 3.3.2.1 Supported Address Types (12) */
 typedef struct sctp_supported_addrs_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__be16 types[0];
 } sctp_supported_addrs_param_t;
 
 /* Appendix A. ECN Capable (32768) */
 typedef struct sctp_ecn_capable_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 } sctp_ecn_capable_param_t;
 
 /* ADDIP Section 3.2.6 Adaptation Layer Indication */
@@ -321,19 +321,19 @@ typedef struct sctp_supported_ext_param {
 
 /* AUTH Section 3.1 Random */
 typedef struct sctp_random_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u8 random_val[0];
 } sctp_random_param_t;
 
 /* AUTH Section 3.2 Chunk List */
 typedef struct sctp_chunks_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
 } sctp_chunks_param_t;
 
 /* AUTH Section 3.3 HMAC Algorithm */
 typedef struct sctp_hmac_algo_param {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__be16 hmac_ids[0];
 } sctp_hmac_algo_param_t;
 
@@ -341,18 +341,18 @@ typedef struct sctp_hmac_algo_param {
  *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
  *   association.
  */
-typedef sctp_init_chunk_t sctp_initack_chunk_t;
+typedef struct sctp_init_chunk sctp_initack_chunk_t;
 
 /* Section 3.3.3.1 State Cookie (7) */
 typedef struct sctp_cookie_param {
-	sctp_paramhdr_t p;
+	struct sctp_paramhdr p;
 	__u8 body[0];
 } sctp_cookie_param_t;
 
 /* Section 3.3.3.1 Unrecognized Parameters (8) */
 typedef struct sctp_unrecognized_param {
-	sctp_paramhdr_t param_hdr;
-	sctp_paramhdr_t unrecognized;
+	struct sctp_paramhdr param_hdr;
+	struct sctp_paramhdr unrecognized;
 } sctp_unrecognized_param_t;
 
 
@@ -386,7 +386,7 @@ typedef struct sctp_sackhdr {
 } sctp_sackhdr_t;
 
 typedef struct sctp_sack_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_sackhdr_t sack_hdr;
 } sctp_sack_chunk_t;
 
@@ -399,11 +399,11 @@ typedef struct sctp_sack_chunk {
  */
 
 typedef struct sctp_heartbeathdr {
-	sctp_paramhdr_t info;
+	struct sctp_paramhdr info;
 } sctp_heartbeathdr_t;
 
 typedef struct sctp_heartbeat_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_heartbeathdr_t hb_hdr;
 } sctp_heartbeat_chunk_t;
 
@@ -413,7 +413,7 @@ typedef struct sctp_heartbeat_chunk {
  * chunk descriptor.
  */
 typedef struct sctp_abort_chunk {
-        sctp_chunkhdr_t uh;
+	struct sctp_chunkhdr uh;
 } sctp_abort_chunk_t;
 
 
@@ -425,8 +425,8 @@ typedef struct sctp_shutdownhdr {
 } sctp_shutdownhdr_t;
 
 struct sctp_shutdown_chunk_t {
-        sctp_chunkhdr_t    chunk_hdr;
-        sctp_shutdownhdr_t shutdown_hdr;
+	struct sctp_chunkhdr chunk_hdr;
+	sctp_shutdownhdr_t shutdown_hdr;
 };
 
 /* RFC 2960.  Section 3.3.10 Operation Error (ERROR) (9) */
@@ -438,8 +438,8 @@ typedef struct sctp_errhdr {
 } sctp_errhdr_t;
 
 typedef struct sctp_operr_chunk {
-        sctp_chunkhdr_t chunk_hdr;
-	sctp_errhdr_t   err_hdr;
+	struct sctp_chunkhdr chunk_hdr;
+	sctp_errhdr_t err_hdr;
 } sctp_operr_chunk_t;
 
 /* RFC 2960 3.3.10 - Operation Error
@@ -528,7 +528,7 @@ typedef struct sctp_ecnehdr {
 } sctp_ecnehdr_t;
 
 typedef struct sctp_ecne_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_ecnehdr_t ence_hdr;
 } sctp_ecne_chunk_t;
 
@@ -540,7 +540,7 @@ typedef struct sctp_cwrhdr {
 } sctp_cwrhdr_t;
 
 typedef struct sctp_cwr_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_cwrhdr_t cwr_hdr;
 } sctp_cwr_chunk_t;
 
@@ -639,7 +639,7 @@ struct sctp_fwdtsn_chunk {
  *	report status of ASCONF processing.
  */
 typedef struct sctp_addip_param {
-	sctp_paramhdr_t	param_hdr;
+	struct sctp_paramhdr	param_hdr;
 	__be32		crr_id;
 } sctp_addip_param_t;
 
@@ -649,7 +649,7 @@ typedef struct sctp_addiphdr {
 } sctp_addiphdr_t;
 
 typedef struct sctp_addip_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_addiphdr_t addip_hdr;
 } sctp_addip_chunk_t;
 
@@ -709,7 +709,7 @@ typedef struct sctp_authhdr {
 } sctp_authhdr_t;
 
 typedef struct sctp_auth_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	sctp_authhdr_t auth_hdr;
 } sctp_auth_chunk_t;
 
@@ -719,12 +719,12 @@ struct sctp_infox {
 };
 
 struct sctp_reconf_chunk {
-	sctp_chunkhdr_t chunk_hdr;
+	struct sctp_chunkhdr chunk_hdr;
 	__u8 params[0];
 };
 
 struct sctp_strreset_outreq {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 request_seq;
 	__u32 response_seq;
 	__u32 send_reset_at_tsn;
@@ -732,18 +732,18 @@ struct sctp_strreset_outreq {
 };
 
 struct sctp_strreset_inreq {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 request_seq;
 	__u16 list_of_streams[0];
 };
 
 struct sctp_strreset_tsnreq {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 request_seq;
 };
 
 struct sctp_strreset_addstrm {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 request_seq;
 	__u16 number_of_streams;
 	__u16 reserved;
@@ -760,13 +760,13 @@ enum {
 };
 
 struct sctp_strreset_resp {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 response_seq;
 	__u32 result;
 };
 
 struct sctp_strreset_resptsn {
-	sctp_paramhdr_t param_hdr;
+	struct sctp_paramhdr param_hdr;
 	__u32 response_seq;
 	__u32 result;
 	__u32 senders_next_tsn;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a17e235639ae..3d3ceaac13b1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -252,7 +252,7 @@ struct nf_conntrack {
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 struct nf_bridge_info {
-	atomic_t		use;
+	refcount_t		use;
 	enum {
 		BRNF_PROTO_UNCHANGED,
 		BRNF_PROTO_8021Q,
@@ -761,7 +761,7 @@ struct sk_buff {
 	unsigned char		*head,
 				*data;
 	unsigned int		truesize;
-	atomic_t		users;
+	refcount_t		users;
 };
 
 #ifdef __KERNEL__
@@ -872,9 +872,9 @@ static inline bool skb_unref(struct sk_buff *skb)
 {
 	if (unlikely(!skb))
 		return false;
-	if (likely(atomic_read(&skb->users) == 1))
+	if (likely(refcount_read(&skb->users) == 1))
 		smp_rmb();
-	else if (likely(!atomic_dec_and_test(&skb->users)))
+	else if (likely(!refcount_dec_and_test(&skb->users)))
 		return false;
 
 	return true;
@@ -915,7 +915,7 @@ struct sk_buff_fclones {
 
 	struct sk_buff	skb2;
 
-	atomic_t	fclone_ref;
+	refcount_t	fclone_ref;
 };
 
 /**
@@ -935,7 +935,7 @@ static inline bool skb_fclone_busy(const struct sock *sk,
 	fclones = container_of(skb, struct sk_buff_fclones, skb1);
 
 	return skb->fclone == SKB_FCLONE_ORIG &&
-	       atomic_read(&fclones->fclone_ref) > 1 &&
+	       refcount_read(&fclones->fclone_ref) > 1 &&
 	       fclones->skb2.sk == sk;
 }
 
@@ -1283,7 +1283,7 @@ static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list,
  */
 static inline struct sk_buff *skb_get(struct sk_buff *skb)
 {
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 	return skb;
 }
 
@@ -1384,7 +1384,7 @@ static inline void __skb_header_release(struct sk_buff *skb)
  */
 static inline int skb_shared(const struct sk_buff *skb)
 {
-	return atomic_read(&skb->users) != 1;
+	return refcount_read(&skb->users) != 1;
 }
 
 /**
@@ -2206,6 +2206,11 @@ static inline int skb_mac_offset(const struct sk_buff *skb)
 	return skb_mac_header(skb) - skb->data;
 }
 
+static inline u32 skb_mac_header_len(const struct sk_buff *skb)
+{
+	return skb->network_header - skb->mac_header;
+}
+
 static inline int skb_mac_header_was_set(const struct sk_buff *skb)
 {
 	return skb->mac_header != (typeof(skb->mac_header))~0U;
@@ -3589,13 +3594,13 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 {
-	if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
+	if (nf_bridge && refcount_dec_and_test(&nf_bridge->use))
 		kfree(nf_bridge);
 }
 static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
 {
 	if (nf_bridge)
-		atomic_inc(&nf_bridge->use);
+		refcount_inc(&nf_bridge->use);
 }
 #endif /* CONFIG_BRIDGE_NETFILTER */
 static inline void nf_reset(struct sk_buff *skb)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 07ef550c6627..93315d6b21a8 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -84,6 +84,7 @@ struct kmem_cache {
 	int red_left_pad;	/* Left redzone padding size */
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
+	struct work_struct kobj_remove_work;
 #endif
 #ifdef CONFIG_MEMCG
 	struct memcg_cache_params memcg_params;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 110f4532188c..f7043ccca81c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,7 +29,6 @@
  */
 struct tk_read_base {
 	struct clocksource	*clock;
-	u64			(*read)(struct clocksource *cs);
 	u64			mask;
 	u64			cycle_last;
 	u32			mult;
@@ -58,7 +57,7 @@ struct tk_read_base {
  *			interval.
  * @xtime_remainder:	Shifted nano seconds left over when rounding
  *			@cycle_interval
- * @raw_interval:	Raw nano seconds accumulated per NTP interval.
+ * @raw_interval:	Shifted raw nano seconds accumulated per NTP interval.
  * @ntp_error:		Difference between accumulated time and NTP time in ntp
  *			shifted nano seconds.
  * @ntp_error_shift:	Shift conversion between clock shifted nano seconds and
@@ -100,7 +99,7 @@ struct timekeeper {
 	u64			cycle_interval;
 	u64			xtime_interval;
 	s64			xtime_remainder;
-	u32			raw_interval;
+	u64			raw_interval;
 	/* The ntp_tick_length() value currently being used.
 	 * This cached copy ensures we consistently apply the tick
 	 * length for an entire tick, as ntp_tick_length may change
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 00d232406f18..021f7a88f52c 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -117,6 +117,9 @@ struct cdc_ncm_ctx {
 	u32 tx_curr_frame_num;
 	u32 rx_max;
 	u32 tx_max;
+	u32 tx_curr_size;
+	u32 tx_low_mem_max_cnt;
+	u32 tx_low_mem_val;
 	u32 max_datagram_size;
 	u16 tx_max_datagrams;
 	u16 tx_remainder;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index fd60eccb59a6..3a385e4767f0 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -4,6 +4,7 @@
 #include <linux/socket.h>
 #include <linux/un.h>
 #include <linux/mutex.h>
+#include <linux/refcount.h>
 #include <net/sock.h>
 
 void unix_inflight(struct user_struct *user, struct file *fp);
@@ -21,7 +22,7 @@ extern spinlock_t unix_table_lock;
 extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 
 struct unix_address {
-	atomic_t	refcnt;
+	refcount_t	refcnt;
 	int		len;
 	unsigned int	hash;
 	struct sockaddr_un name[0];
diff --git a/include/net/arp.h b/include/net/arp.h
index 65619a2de6f4..17d90e4e8dc5 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -28,7 +28,7 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32
 
 	rcu_read_lock_bh();
 	n = __ipv4_neigh_lookup_noref(dev, key);
-	if (n && !atomic_inc_not_zero(&n->refcnt))
+	if (n && !refcount_inc_not_zero(&n->refcnt))
 		n = NULL;
 	rcu_read_unlock_bh();
 
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 76c7300626d6..c487bfa2f479 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -5,6 +5,7 @@
 #include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/fib_rules.h>
+#include <linux/refcount.h>
 #include <net/flow.h>
 #include <net/rtnetlink.h>
 
@@ -29,7 +30,7 @@ struct fib_rule {
 	struct fib_rule __rcu	*ctarget;
 	struct net		*fr_net;
 
-	atomic_t		refcnt;
+	refcount_t		refcnt;
 	u32			pref;
 	int			suppress_ifgroup;
 	int			suppress_prefixlen;
@@ -103,12 +104,12 @@ struct fib_rules_ops {
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
-	atomic_inc(&rule->refcnt);
+	refcount_inc(&rule->refcnt);
 }
 
 static inline void fib_rule_put(struct fib_rule *rule)
 {
-	if (atomic_dec_and_test(&rule->refcnt))
+	if (refcount_dec_and_test(&rule->refcnt))
 		kfree_rcu(rule, rcu);
 }
 
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 975779d0e7b0..440c1e9d0623 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -50,7 +50,7 @@ struct inet_frag_queue {
 	spinlock_t		lock;
 	struct timer_list	timer;
 	struct hlist_node	list;
-	atomic_t		refcnt;
+	refcount_t		refcnt;
 	struct sk_buff		*fragments;
 	struct sk_buff		*fragments_tail;
 	ktime_t			stamp;
@@ -129,7 +129,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
 
 static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
 {
-	if (atomic_dec_and_test(&q->refcnt))
+	if (refcount_dec_and_test(&q->refcnt))
 		inet_frag_destroy(q, f);
 }
 
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 1178931288cb..5026b1f08bb8 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -32,7 +32,7 @@
 #include <net/tcp_states.h>
 #include <net/netns/hash.h>
 
-#include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <asm/byteorder.h>
 
 /* This is for all connections with a full identity, no wildcards.
@@ -334,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net,
 	sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
 			   dport, dif, &refcounted);
 
-	if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+	if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
 }
@@ -359,7 +359,6 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
 			     refcounted);
 }
 
-u32 sk_ehashfn(const struct sock *sk);
 u32 inet6_ehashfn(const struct net *net,
 		  const struct in6_addr *laddr, const u16 lport,
 		  const struct in6_addr *faddr, const __be16 fport);
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 235c7811a86a..f2a215fc78e4 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -46,7 +46,7 @@ struct inet_peer {
 		struct rcu_head     gc_rcu;
 	};
 	/*
-	 * Once inet_peer is queued for deletion (refcnt == -1), following field
+	 * Once inet_peer is queued for deletion (refcnt == 0), following field
 	 * is not available: rid
 	 * We can share memory with rcu_head to help keep inet_peer small.
 	 */
@@ -60,7 +60,7 @@ struct inet_peer {
 
 	/* following fields might be frequently dirtied */
 	__u32			dtime;	/* the time of last use of not referenced entries */
-	atomic_t		refcnt;
+	refcount_t		refcnt;
 };
 
 struct inet_peer_base {
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 1036c902d2c9..31b1bb11ba3f 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -384,7 +384,7 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons
 
 	rcu_read_lock_bh();
 	n = __ipv6_neigh_lookup_noref(dev, pkey);
-	if (n && !atomic_inc_not_zero(&n->refcnt))
+	if (n && !refcount_inc_not_zero(&n->refcnt))
 		n = NULL;
 	rcu_read_unlock_bh();
 
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 639b67564a7d..afc39e3a3f7c 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -17,6 +17,7 @@
  */
 
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rcupdate.h>
@@ -76,7 +77,7 @@ struct neigh_parms {
 	void	*sysctl_table;
 
 	int dead;
-	atomic_t refcnt;
+	refcount_t refcnt;
 	struct rcu_head rcu_head;
 
 	int	reachable_time;
@@ -137,7 +138,7 @@ struct neighbour {
 	unsigned long		confirmed;
 	unsigned long		updated;
 	rwlock_t		lock;
-	atomic_t		refcnt;
+	refcount_t		refcnt;
 	struct sk_buff_head	arp_queue;
 	unsigned int		arp_queue_len_bytes;
 	struct timer_list	timer;
@@ -395,12 +396,12 @@ void neigh_sysctl_unregister(struct neigh_parms *p);
 
 static inline void __neigh_parms_put(struct neigh_parms *parms)
 {
-	atomic_dec(&parms->refcnt);
+	refcount_dec(&parms->refcnt);
 }
 
 static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms)
 {
-	atomic_inc(&parms->refcnt);
+	refcount_inc(&parms->refcnt);
 	return parms;
 }
 
@@ -410,18 +411,18 @@ static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms)
 
 static inline void neigh_release(struct neighbour *neigh)
 {
-	if (atomic_dec_and_test(&neigh->refcnt))
+	if (refcount_dec_and_test(&neigh->refcnt))
 		neigh_destroy(neigh);
 }
 
 static inline struct neighbour * neigh_clone(struct neighbour *neigh)
 {
 	if (neigh)
-		atomic_inc(&neigh->refcnt);
+		refcount_inc(&neigh->refcnt);
 	return neigh;
 }
 
-#define neigh_hold(n)	atomic_inc(&(n)->refcnt)
+#define neigh_hold(n)	refcount_inc(&(n)->refcnt)
 
 static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 {
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index fe80bb48ab1f..31a2b51bef2c 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -5,6 +5,7 @@
 #define __NET_NET_NAMESPACE_H
 
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <linux/workqueue.h>
 #include <linux/list.h>
 #include <linux/sysctl.h>
@@ -46,7 +47,7 @@ struct netns_ipvs;
 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 
 struct net {
-	atomic_t		passive;	/* To decided when the network
+	refcount_t		passive;	/* To decided when the network
 						 * namespace should be freed.
 						 */
 	atomic_t		count;		/* To decided when the network
@@ -158,6 +159,7 @@ extern struct net init_net;
 struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
 			struct net *old_net);
 
+void net_ns_barrier(void);
 #else /* CONFIG_NET_NS */
 #include <linux/sched.h>
 #include <linux/nsproxy.h>
@@ -168,6 +170,8 @@ static inline struct net *copy_net_ns(unsigned long flags,
 		return ERR_PTR(-EINVAL);
 	return old_net;
 }
+
+static inline void net_ns_barrier(void) {}
 #endif /* CONFIG_NET_NS */
 
 
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index 0b0c35c37125..925524ede6c8 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -8,7 +8,7 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
 	skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
 
 	if (likely(skb->nf_bridge))
-		atomic_set(&(skb->nf_bridge->use), 1);
+		refcount_set(&(skb->nf_bridge->use), 1);
 
 	return skb->nf_bridge;
 }
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 8ece3612d0cd..48407569585d 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -225,9 +225,13 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       u32 seq);
 
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
-void nf_ct_iterate_cleanup(struct net *net,
-			   int (*iter)(struct nf_conn *i, void *data),
-			   void *data, u32 portid, int report);
+void nf_ct_iterate_cleanup_net(struct net *net,
+			       int (*iter)(struct nf_conn *i, void *data),
+			       void *data, u32 portid, int report);
+
+/* also set unconfirmed conntracks as dying. Only use in module exit path. */
+void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data),
+			   void *data);
 
 struct nf_conntrack_zone;
 
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index e01559b4d781..6d14b36e3a49 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -71,7 +71,7 @@ struct nf_conntrack_l3proto {
 	struct module *me;
 };
 
-extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
+extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
 
 #ifdef CONFIG_SYSCTL
 /* Protocol pernet registration. */
@@ -100,7 +100,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
 static inline struct nf_conntrack_l3proto *
 __nf_ct_l3proto_find(u_int16_t l3proto)
 {
-	if (unlikely(l3proto >= AF_MAX))
+	if (unlikely(l3proto >= NFPROTO_NUMPROTO))
 		return &nf_conntrack_l3proto_generic;
 	return rcu_dereference(nf_ct_l3protos[l3proto]);
 }
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 8a8bab8d7b15..bd5be0d691d5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -281,6 +281,23 @@ struct nft_set_estimate {
 	enum nft_set_class	space;
 };
 
+/**
+ *      struct nft_set_type - nf_tables set type
+ *
+ *      @select_ops: function to select nft_set_ops
+ *      @ops: default ops, used when no select_ops functions is present
+ *      @list: used internally
+ *      @owner: module reference
+ */
+struct nft_set_type {
+	const struct nft_set_ops	*(*select_ops)(const struct nft_ctx *,
+						       const struct nft_set_desc *desc,
+						       u32 flags);
+	const struct nft_set_ops	*ops;
+	struct list_head		list;
+	struct module			*owner;
+};
+
 struct nft_set_ext;
 struct nft_expr;
 
@@ -297,8 +314,6 @@ struct nft_expr;
  *	@privsize: function to return size of set private data
  *	@init: initialize private data of new set instance
  *	@destroy: destroy private data of set instance
- *	@list: nf_tables_set_ops list node
- *	@owner: module reference
  *	@elemsize: element private size
  *	@features: features supported by the implementation
  */
@@ -336,7 +351,8 @@ struct nft_set_ops {
 						struct nft_set *set,
 						struct nft_set_iter *iter);
 
-	unsigned int			(*privsize)(const struct nlattr * const nla[]);
+	unsigned int			(*privsize)(const struct nlattr * const nla[],
+						    const struct nft_set_desc *desc);
 	bool				(*estimate)(const struct nft_set_desc *desc,
 						    u32 features,
 						    struct nft_set_estimate *est);
@@ -345,14 +361,13 @@ struct nft_set_ops {
 						const struct nlattr * const nla[]);
 	void				(*destroy)(const struct nft_set *set);
 
-	struct list_head		list;
-	struct module			*owner;
 	unsigned int			elemsize;
 	u32				features;
+	const struct nft_set_type	*type;
 };
 
-int nft_register_set(struct nft_set_ops *ops);
-void nft_unregister_set(struct nft_set_ops *ops);
+int nft_register_set(struct nft_set_type *type);
+void nft_unregister_set(struct nft_set_type *type);
 
 /**
  * 	struct nft_set - nf_tables set instance
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index efe98068880f..72d6435fc16c 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -37,7 +37,7 @@
 #include <linux/in6.h>
 #include <net/netlink.h>
 #include <net/request_sock.h>
-#include <linux/atomic.h>
+#include <linux/refcount.h>
 
 struct cipso_v4_doi;
 struct calipso_doi;
@@ -136,7 +136,7 @@ struct netlbl_audit {
  *
  */
 struct netlbl_lsm_cache {
-	atomic_t refcount;
+	refcount_t refcount;
 	void (*free) (const void *data);
 	void *data;
 };
@@ -295,7 +295,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags)
 
 	cache = kzalloc(sizeof(*cache), flags);
 	if (cache)
-		atomic_set(&cache->refcount, 1);
+		refcount_set(&cache->refcount, 1);
 	return cache;
 }
 
@@ -309,7 +309,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags)
  */
 static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache)
 {
-	if (!atomic_dec_and_test(&cache->refcount))
+	if (!refcount_dec_and_test(&cache->refcount))
 		return;
 
 	if (cache->free)
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 53ced67c4ae9..23e22054aa60 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -19,6 +19,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/bug.h>
+#include <linux/refcount.h>
 
 #include <net/sock.h>
 
@@ -89,7 +90,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
 		return NULL;
 	req->rsk_listener = NULL;
 	if (attach_listener) {
-		if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) {
+		if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) {
 			kmem_cache_free(ops->slab, req);
 			return NULL;
 		}
@@ -100,7 +101,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
 	sk_node_init(&req_to_sk(req)->sk_node);
 	sk_tx_queue_clear(req_to_sk(req));
 	req->saved_syn = NULL;
-	atomic_set(&req->rsk_refcnt, 0);
+	refcount_set(&req->rsk_refcnt, 0);
 
 	return req;
 }
@@ -108,7 +109,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
 static inline void reqsk_free(struct request_sock *req)
 {
 	/* temporary debugging */
-	WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);
+	WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
 
 	req->rsk_ops->destructor(req);
 	if (req->rsk_listener)
@@ -119,7 +120,7 @@ static inline void reqsk_free(struct request_sock *req)
 
 static inline void reqsk_put(struct request_sock *req)
 {
-	if (atomic_dec_and_test(&req->rsk_refcnt))
+	if (refcount_dec_and_test(&req->rsk_refcnt))
 		reqsk_free(req);
 }
 
diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index 9b9fb122b31f..171244bd856f 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -97,8 +97,10 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
 				     struct sctp_hmac_algo_param *hmacs);
 int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
 				    __be16 hmac_id);
-int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc);
-int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc);
+int sctp_auth_send_cid(enum sctp_cid chunk,
+		       const struct sctp_association *asoc);
+int sctp_auth_recv_cid(enum sctp_cid chunk,
+		       const struct sctp_association *asoc);
 void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
 			    struct sk_buff *skb,
 			    struct sctp_auth_chunk *auth, gfp_t gfp);
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index d4a20d00461c..d4679e7a5ed5 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -132,7 +132,7 @@ typedef union {
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
 	struct sctp_bind_addr *bp;
-	sctp_init_chunk_t *init;
+	struct sctp_init_chunk *init;
 	struct sctp_ulpevent *ulpevent;
 	struct sctp_packet *packet;
 	sctp_sackhdr_t *sackh;
@@ -173,7 +173,7 @@ SCTP_ARG_CONSTRUCTOR(CHUNK,	struct sctp_chunk *, chunk)
 SCTP_ARG_CONSTRUCTOR(ASOC,	struct sctp_association *, asoc)
 SCTP_ARG_CONSTRUCTOR(TRANSPORT,	struct sctp_transport *, transport)
 SCTP_ARG_CONSTRUCTOR(BA,	struct sctp_bind_addr *, bp)
-SCTP_ARG_CONSTRUCTOR(PEER_INIT,	sctp_init_chunk_t *, init)
+SCTP_ARG_CONSTRUCTOR(PEER_INIT,	struct sctp_init_chunk *, init)
 SCTP_ARG_CONSTRUCTOR(ULPEVENT,  struct sctp_ulpevent *, ulpevent)
 SCTP_ARG_CONSTRUCTOR(PACKET,	struct sctp_packet *, packet)
 SCTP_ARG_CONSTRUCTOR(SACKH,	sctp_sackhdr_t *, sackh)
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index b07a745ab69f..9b18044c551e 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -130,7 +130,7 @@ typedef enum {
  */
 
 typedef union {
-	sctp_cid_t chunk;
+	enum sctp_cid chunk;
 	sctp_event_timeout_t timeout;
 	sctp_event_other_t other;
 	sctp_event_primitive_t primitive;
@@ -141,7 +141,7 @@ static inline sctp_subtype_t	\
 SCTP_ST_## _name (_type _arg)		\
 { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; }
 
-SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,		sctp_cid_t,		chunk)
+SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,		enum sctp_cid,		chunk)
 SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	sctp_event_timeout_t,	timeout)
 SCTP_SUBTYPE_CONSTRUCTOR(OTHER,		sctp_event_other_t,	other)
 SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	sctp_event_primitive_t,	primitive)
@@ -152,7 +152,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	sctp_event_primitive_t,	primitive)
 /* Calculate the actual data size in a data chunk */
 #define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\
 		       		- (unsigned long)(c->chunk_hdr)\
-				- sizeof(sctp_data_chunk_t)))
+				- sizeof(struct sctp_data_chunk)))
 
 /* Internal error codes */
 typedef enum {
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 069582ee5d7f..a9519a06a23b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -470,7 +470,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
 #define _sctp_walk_params(pos, chunk, end, member)\
 for (pos.v = chunk->member;\
      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
-     ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+     ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\
      pos.v += SCTP_PAD4(ntohs(pos.p->length)))
 
 #define sctp_walk_errors(err, chunk_hdr)\
@@ -478,7 +478,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
 
 #define _sctp_walk_errors(err, chunk_hdr, end)\
 for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
-	    sizeof(sctp_chunkhdr_t));\
+	    sizeof(struct sctp_chunkhdr));\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
      err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 47113f2c4b0a..860f378333b5 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -325,19 +325,17 @@ void sctp_generate_heartbeat_event(unsigned long peer);
 void sctp_generate_reconf_event(unsigned long peer);
 void sctp_generate_proto_unreach_event(unsigned long peer);
 
-void sctp_ootb_pkt_free(struct sctp_packet *);
+void sctp_ootb_pkt_free(struct sctp_packet *packet);
 
-struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *,
-				       const struct sctp_association *,
-				       struct sctp_chunk *,
+struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       struct sctp_chunk *chunk,
 				       gfp_t gfp, int *err,
 				       struct sctp_chunk **err_chk_p);
-int sctp_addip_addr_config(struct sctp_association *, sctp_param_t,
-			   struct sockaddr_storage*, int);
 
 /* 3rd level prototypes */
-__u32 sctp_generate_tag(const struct sctp_endpoint *);
-__u32 sctp_generate_tsn(const struct sctp_endpoint *);
+__u32 sctp_generate_tag(const struct sctp_endpoint *ep);
+__u32 sctp_generate_tsn(const struct sctp_endpoint *ep);
 
 /* Extern declarations for major data structures.  */
 extern sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES];
@@ -349,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
 	__u16 size;
 
 	size = ntohs(chunk->chunk_hdr->length);
-	size -= sizeof(sctp_data_chunk_t);
+	size -= sizeof(struct sctp_data_chunk);
 
 	return size;
 }
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e26763bfabd6..07c11fefa8c4 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -310,9 +310,10 @@ struct sctp_cookie {
 
 	__u32 adaptation_ind;
 
-	__u8 auth_random[sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH];
+	__u8 auth_random[sizeof(struct sctp_paramhdr) +
+			 SCTP_AUTH_RANDOM_LENGTH];
 	__u8 auth_hmacs[SCTP_AUTH_NUM_HMACS * sizeof(__u16) + 2];
-	__u8 auth_chunks[sizeof(sctp_paramhdr_t) + SCTP_AUTH_MAX_CHUNKS];
+	__u8 auth_chunks[sizeof(struct sctp_paramhdr) + SCTP_AUTH_MAX_CHUNKS];
 
 	/* This is a shim for my peer's INIT packet, followed by
 	 * a copy of the raw address list of the association.
@@ -1297,11 +1298,11 @@ int sctp_has_association(struct net *net, const union sctp_addr *laddr,
 
 int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
 		     const struct sctp_association *asoc,
-		     sctp_cid_t, sctp_init_chunk_t *peer_init,
+		     enum sctp_cid cid, struct sctp_init_chunk *peer_init,
 		     struct sctp_chunk *chunk, struct sctp_chunk **err_chunk);
 int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk,
 		      const union sctp_addr *peer,
-		      sctp_init_chunk_t *init, gfp_t gfp);
+		      struct sctp_init_chunk *init, gfp_t gfp);
 __u32 sctp_generate_tag(const struct sctp_endpoint *);
 __u32 sctp_generate_tsn(const struct sctp_endpoint *);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 00d09140e354..60200f4f4028 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -66,6 +66,7 @@
 #include <linux/poll.h>
 
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <net/dst.h>
 #include <net/checksum.h>
 #include <net/tcp_states.h>
@@ -219,7 +220,7 @@ struct sock_common {
 		u32		skc_tw_rcv_nxt; /* struct tcp_timewait_sock  */
 	};
 
-	atomic_t		skc_refcnt;
+	refcount_t		skc_refcnt;
 	/* private: */
 	int                     skc_dontcopy_end[0];
 	union {
@@ -390,7 +391,7 @@ struct sock {
 
 	/* ===== cache line for TX ===== */
 	int			sk_wmem_queued;
-	atomic_t		sk_wmem_alloc;
+	refcount_t		sk_wmem_alloc;
 	unsigned long		sk_tsq_flags;
 	struct sk_buff		*sk_send_head;
 	struct sk_buff_head	sk_write_queue;
@@ -611,7 +612,7 @@ static inline bool __sk_del_node_init(struct sock *sk)
 
 static __always_inline void sock_hold(struct sock *sk)
 {
-	atomic_inc(&sk->sk_refcnt);
+	refcount_inc(&sk->sk_refcnt);
 }
 
 /* Ungrab socket in the context, which assumes that socket refcnt
@@ -619,7 +620,7 @@ static __always_inline void sock_hold(struct sock *sk)
  */
 static __always_inline void __sock_put(struct sock *sk)
 {
-	atomic_dec(&sk->sk_refcnt);
+	refcount_dec(&sk->sk_refcnt);
 }
 
 static inline bool sk_del_node_init(struct sock *sk)
@@ -628,7 +629,7 @@ static inline bool sk_del_node_init(struct sock *sk)
 
 	if (rc) {
 		/* paranoid for a while -acme */
-		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
 	return rc;
@@ -650,7 +651,7 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
 
 	if (rc) {
 		/* paranoid for a while -acme */
-		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
 	return rc;
@@ -1144,9 +1145,9 @@ static inline void sk_refcnt_debug_dec(struct sock *sk)
 
 static inline void sk_refcnt_debug_release(const struct sock *sk)
 {
-	if (atomic_read(&sk->sk_refcnt) != 1)
+	if (refcount_read(&sk->sk_refcnt) != 1)
 		printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
-		       sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt));
+		       sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt));
 }
 #else /* SOCK_REFCNT_DEBUG */
 #define sk_refcnt_debug_inc(sk) do { } while (0)
@@ -1636,7 +1637,7 @@ void sock_init_data(struct socket *sock, struct sock *sk);
 /* Ungrab socket and destroy it, if it was the last reference. */
 static inline void sock_put(struct sock *sk)
 {
-	if (atomic_dec_and_test(&sk->sk_refcnt))
+	if (refcount_dec_and_test(&sk->sk_refcnt))
 		sk_free(sk);
 }
 /* Generic version of sock_put(), dealing with all sockets
@@ -1911,7 +1912,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
  */
 static inline int sk_wmem_alloc_get(const struct sock *sk)
 {
-	return atomic_read(&sk->sk_wmem_alloc) - 1;
+	return refcount_read(&sk->sk_wmem_alloc) - 1;
 }
 
 /**
@@ -2055,7 +2056,7 @@ static inline unsigned long sock_wspace(struct sock *sk)
 	int amt = 0;
 
 	if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
-		amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+		amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc);
 		if (amt < 0)
 			amt = 0;
 	}
@@ -2136,7 +2137,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
  */
 static inline bool sock_writeable(const struct sock *sk)
 {
-	return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
+	return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
 }
 
 static inline gfp_t gfp_any(void)
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index c784a6ac6ef1..8ae9e3b6392e 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -217,6 +217,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
 
 bool switchdev_port_same_parent_id(struct net_device *a,
 				   struct net_device *b);
+
+#define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops))
 #else
 
 static inline void switchdev_deferred_process(void)
@@ -322,6 +324,8 @@ static inline bool switchdev_port_same_parent_id(struct net_device *a,
 	return false;
 }
 
+#define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0)
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d0751b79d99c..70483296157f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -46,6 +46,10 @@
 #include <linux/seq_file.h>
 #include <linux/memcontrol.h>
 
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/bpf-cgroup.h>
+
 extern struct inet_hashinfo tcp_hashinfo;
 
 extern struct percpu_counter tcp_orphan_count;
@@ -1000,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name);
 void tcp_get_available_congestion_control(char *buf, size_t len);
 void tcp_get_allowed_congestion_control(char *buf, size_t len);
 int tcp_set_allowed_congestion_control(char *allowed);
-int tcp_set_congestion_control(struct sock *sk, const char *name);
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
+void tcp_reinit_congestion_control(struct sock *sk,
+				   const struct tcp_congestion_ops *ca);
 u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
 
@@ -2021,4 +2027,62 @@ int tcp_set_ulp(struct sock *sk, const char *name);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
 
+/* Call BPF_SOCK_OPS program that returns an int. If the return value
+ * is < 0, then the BPF op failed (for example if the loaded BPF
+ * program does not support the chosen operation or there is no BPF
+ * program loaded).
+ */
+#ifdef CONFIG_BPF
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+	struct bpf_sock_ops_kern sock_ops;
+	int ret;
+
+	if (sk_fullsock(sk))
+		sock_owned_by_me(sk);
+
+	memset(&sock_ops, 0, sizeof(sock_ops));
+	sock_ops.sk = sk;
+	sock_ops.op = op;
+
+	ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+	if (ret == 0)
+		ret = sock_ops.reply;
+	else
+		ret = -1;
+	return ret;
+}
+#else
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+	return -EPERM;
+}
+#endif
+
+static inline u32 tcp_timeout_init(struct sock *sk)
+{
+	int timeout;
+
+	timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
+
+	if (timeout <= 0)
+		timeout = TCP_TIMEOUT_INIT;
+	return timeout;
+}
+
+static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
+{
+	int rwnd;
+
+	rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
+
+	if (rwnd < 0)
+		rwnd = 0;
+	return rwnd;
+}
+
+static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
+{
+	return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
+}
 #endif	/* _TCP_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 1468dbd0f09a..972ce4baab6b 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -302,6 +302,67 @@ struct sock *__udp6_lib_lookup(struct net *net,
 struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
 
+/* UDP uses skb->dev_scratch to cache as much information as possible and avoid
+ * possibly multiple cache miss on dequeue()
+ */
+#if BITS_PER_LONG == 64
+
+/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on
+ * cold cache lines at recvmsg time.
+ * skb->len can be stored on 16 bits since the udp header has been already
+ * validated and pulled.
+ */
+struct udp_dev_scratch {
+	u32 truesize;
+	u16 len;
+	bool is_linear;
+	bool csum_unnecessary;
+};
+
+static inline unsigned int udp_skb_len(struct sk_buff *skb)
+{
+	return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
+}
+
+static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
+{
+	return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
+}
+
+static inline bool udp_skb_is_linear(struct sk_buff *skb)
+{
+	return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
+}
+
+#else
+static inline unsigned int udp_skb_len(struct sk_buff *skb)
+{
+	return skb->len;
+}
+
+static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
+{
+	return skb_csum_unnecessary(skb);
+}
+
+static inline bool udp_skb_is_linear(struct sk_buff *skb)
+{
+	return !skb_is_nonlinear(skb);
+}
+#endif
+
+static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
+				  struct iov_iter *to)
+{
+	int n, copy = len - off;
+
+	n = copy_to_iter(skb->data + off, copy, to);
+	if (n == copy)
+		return 0;
+
+	return -EFAULT;
+}
+
 /*
  * 	SNMP statistics for UDP and UDP-Lite
  */
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index b816a0a6686e..326e8498b10e 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -221,9 +221,17 @@ struct vxlan_config {
 	bool			no_share;
 };
 
+struct vxlan_dev_node {
+	struct hlist_node hlist;
+	struct vxlan_dev *vxlan;
+};
+
 /* Pseudo network device */
 struct vxlan_dev {
-	struct hlist_node hlist;	/* vni hash table */
+	struct vxlan_dev_node hlist4;	/* vni hash table for IPv4 socket */
+#if IS_ENABLED(CONFIG_IPV6)
+	struct vxlan_dev_node hlist6;	/* vni hash table for IPv6 socket */
+#endif
 	struct list_head  next;		/* vxlan's per namespace list */
 	struct vxlan_sock __rcu *vn4_sock;	/* listening socket for IPv4 */
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 01f5bc144ee5..01fa357e9a32 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1854,8 +1854,9 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
 }
 #endif
 
-#ifdef CONFIG_XFRM_OFFLOAD
 void __net_init xfrm_dev_init(void);
+
+#ifdef CONFIG_XFRM_OFFLOAD
 int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
 int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 		       struct xfrm_user_offload *xuo);
@@ -1881,10 +1882,6 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
 	}
 }
 #else
-static inline void __net_init xfrm_dev_init(void)
-{
-}
-
 static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
 {
 	return 0;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f94b48b168dc..e99e3e6f8b37 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -120,12 +120,14 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_IN,
 	BPF_PROG_TYPE_LWT_OUT,
 	BPF_PROG_TYPE_LWT_XMIT,
+	BPF_PROG_TYPE_SOCK_OPS,
 };
 
 enum bpf_attach_type {
 	BPF_CGROUP_INET_INGRESS,
 	BPF_CGROUP_INET_EGRESS,
 	BPF_CGROUP_INET_SOCK_CREATE,
+	BPF_CGROUP_SOCK_OPS,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -518,6 +520,25 @@ union bpf_attr {
  *     Set full skb->hash.
  *     @skb: pointer to skb
  *     @hash: hash to set
+ *
+ * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
+ *     Calls setsockopt. Not all opts are available, only those with
+ *     integer optvals plus TCP_CONGESTION.
+ *     Supported levels: SOL_SOCKET and IPROTO_TCP
+ *     @bpf_socket: pointer to bpf_socket
+ *     @level: SOL_SOCKET or IPROTO_TCP
+ *     @optname: option name
+ *     @optval: pointer to option value
+ *     @optlen: length of optval in byes
+ *     Return: 0 or negative error
+ *
+ * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
+ *     Grow or shrink room in sk_buff.
+ *     @skb: pointer to skb
+ *     @len_diff: (signed) amount of room to grow/shrink
+ *     @mode: operation mode (enum bpf_adj_room_mode)
+ *     @flags: reserved for future use
+ *     Return: 0 on success or negative error code
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -568,7 +589,9 @@ union bpf_attr {
 	FN(probe_read_str),		\
 	FN(get_socket_cookie),		\
 	FN(get_socket_uid),		\
-	FN(set_hash),
+	FN(set_hash),			\
+	FN(setsockopt),			\
+	FN(skb_adjust_room),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -618,6 +641,11 @@ enum bpf_func_id {
 /* BPF_FUNC_perf_event_output for sk_buff input context. */
 #define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+	BPF_ADJ_ROOM_NET,
+};
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -720,4 +748,56 @@ struct bpf_map_info {
 	__u32 map_flags;
 } __attribute__((aligned(8)));
 
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+	__u32 op;
+	union {
+		__u32 reply;
+		__u32 replylong[4];
+	};
+	__u32 family;
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
+};
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+	BPF_SOCK_OPS_VOID,
+	BPF_SOCK_OPS_TIMEOUT_INIT,	/* Should return SYN-RTO value to use or
+					 * -1 if default value should be used
+					 */
+	BPF_SOCK_OPS_RWND_INIT,		/* Should return initial advertized
+					 * window (in packets) or -1 if default
+					 * value should be used
+					 */
+	BPF_SOCK_OPS_TCP_CONNECT_CB,	/* Calls BPF program right before an
+					 * active connection is initialized
+					 */
+	BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB,	/* Calls BPF program when an
+						 * active connection is
+						 * established
+						 */
+	BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,	/* Calls BPF program when a
+						 * passive connection is
+						 * established
+						 */
+	BPF_SOCK_OPS_NEEDS_ECN,		/* If connection's congestion control
+					 * needs ECN
+					 */
+};
+
+#define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP	1002	/* Set sndcwnd_clamp */
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index ced9d8b97426..6217ff8500a1 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -121,6 +121,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_RESET_STREAMS	119
 #define SCTP_RESET_ASSOC	120
 #define SCTP_ADD_STREAMS	121
+#define SCTP_SOCKOPT_PEELOFF_FLAGS 122
 
 /* PR-SCTP policies */
 #define SCTP_PR_SCTP_NONE	0x0000
@@ -978,6 +979,11 @@ typedef struct {
 	int sd;
 } sctp_peeloff_arg_t;
 
+typedef struct {
+	sctp_peeloff_arg_t p_arg;
+	unsigned flags;
+} sctp_peeloff_flags_arg_t;
+
 /*
  *  Peer Address Thresholds socket option
  */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index ecb43542246e..d771a3872500 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -335,6 +335,26 @@ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
 }
 
 /* only called from syscall */
+int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+{
+	void **elem, *ptr;
+	int ret =  0;
+
+	if (!map->ops->map_fd_sys_lookup_elem)
+		return -ENOTSUPP;
+
+	rcu_read_lock();
+	elem = array_map_lookup_elem(map, key);
+	if (elem && (ptr = READ_ONCE(*elem)))
+		*value = map->ops->map_fd_sys_lookup_elem(ptr);
+	else
+		ret = -ENOENT;
+	rcu_read_unlock();
+
+	return ret;
+}
+
+/* only called from syscall */
 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
 				 void *key, void *value, u64 map_flags)
 {
@@ -400,6 +420,11 @@ static void prog_fd_array_put_ptr(void *ptr)
 	bpf_prog_put(ptr);
 }
 
+static u32 prog_fd_array_sys_lookup_elem(void *ptr)
+{
+	return ((struct bpf_prog *)ptr)->aux->id;
+}
+
 /* decrement refcnt of all bpf_progs that are stored in this map */
 void bpf_fd_array_map_clear(struct bpf_map *map)
 {
@@ -418,6 +443,7 @@ const struct bpf_map_ops prog_array_map_ops = {
 	.map_delete_elem = fd_array_map_delete_elem,
 	.map_fd_get_ptr = prog_fd_array_get_ptr,
 	.map_fd_put_ptr = prog_fd_array_put_ptr,
+	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
 };
 
 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
@@ -585,4 +611,5 @@ const struct bpf_map_ops array_of_maps_map_ops = {
 	.map_delete_elem = fd_array_map_delete_elem,
 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
+	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 };
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ea6033cba947..546113430049 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -236,3 +236,40 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 	return ret;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
+
+/**
+ * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
+ * @sk: socket to get cgroup from
+ * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
+ * sk with connection information (IP addresses, etc.) May not contain
+ * cgroup info if it is a req sock.
+ * @type: The type of program to be exectuted
+ *
+ * socket passed is expected to be of type INET or INET6.
+ *
+ * The program type passed in via @type must be suitable for sock_ops
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+				     struct bpf_sock_ops_kern *sock_ops,
+				     enum bpf_attach_type type)
+{
+	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	struct bpf_prog *prog;
+	int ret = 0;
+
+
+	rcu_read_lock();
+
+	prog = rcu_dereference(cgrp->bpf.effective[type]);
+	if (prog)
+		ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
+
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 774069ca18a7..ad5f55922a13 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1297,7 +1297,9 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
  */
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 {
-	fp->bpf_func = interpreters[round_down(fp->aux->stack_depth, 32) / 32];
+	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+
+	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
 
 	/* eBPF JITs can rewrite the program in case constant
 	 * blinding is active. However, in case of error during
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 004334ea13ba..4fb463172aa8 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1244,6 +1244,26 @@ static void fd_htab_map_free(struct bpf_map *map)
 }
 
 /* only called from syscall */
+int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+{
+	void **ptr;
+	int ret = 0;
+
+	if (!map->ops->map_fd_sys_lookup_elem)
+		return -ENOTSUPP;
+
+	rcu_read_lock();
+	ptr = htab_map_lookup_elem(map, key);
+	if (ptr)
+		*value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
+	else
+		ret = -ENOENT;
+	rcu_read_unlock();
+
+	return ret;
+}
+
+/* only called from syscall */
 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 				void *key, void *value, u64 map_flags)
 {
@@ -1305,4 +1325,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
 	.map_delete_elem = htab_map_delete_elem,
 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
+	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 };
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 59bcdf821ae4..1da574612bea 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -95,3 +95,8 @@ void bpf_map_fd_put_ptr(void *ptr)
 	 */
 	bpf_map_put(ptr);
 }
+
+u32 bpf_map_fd_sys_lookup_elem(void *ptr)
+{
+	return ((struct bpf_map *)ptr)->id;
+}
diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h
index 177fadb689dc..6183db9ec08c 100644
--- a/kernel/bpf/map_in_map.h
+++ b/kernel/bpf/map_in_map.h
@@ -19,5 +19,6 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
 void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
 			 int ufd);
 void bpf_map_fd_put_ptr(void *ptr);
+u32 bpf_map_fd_sys_lookup_elem(void *ptr);
 
 #endif
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8942c820d620..18980472f5b0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -24,6 +24,13 @@
 #include <linux/kernel.h>
 #include <linux/idr.h>
 
+#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
+			   (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
+			   (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
+			   (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
+
 DEFINE_PER_CPU(int, bpf_prog_active);
 static DEFINE_IDR(prog_idr);
 static DEFINE_SPINLOCK(prog_idr_lock);
@@ -209,10 +216,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 	const struct bpf_map *map = filp->private_data;
 	const struct bpf_array *array;
 	u32 owner_prog_type = 0;
+	u32 owner_jited = 0;
 
 	if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 		array = container_of(map, struct bpf_array, map);
 		owner_prog_type = array->owner_prog_type;
+		owner_jited = array->owner_jited;
 	}
 
 	seq_printf(m,
@@ -229,9 +238,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 		   map->map_flags,
 		   map->pages * 1ULL << PAGE_SHIFT);
 
-	if (owner_prog_type)
+	if (owner_prog_type) {
 		seq_printf(m, "owner_prog_type:\t%u\n",
 			   owner_prog_type);
+		seq_printf(m, "owner_jited:\t%u\n",
+			   owner_jited);
+	}
 }
 #endif
 
@@ -411,6 +423,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
+	else if (IS_FD_MAP(map))
+		value_size = sizeof(u32);
 	else
 		value_size = map->value_size;
 
@@ -426,9 +440,10 @@ static int map_lookup_elem(union bpf_attr *attr)
 		err = bpf_percpu_array_copy(map, key, value);
 	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 		err = bpf_stackmap_copy(map, key, value);
-	} else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
-		   map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
-		err = -ENOTSUPP;
+	} else if (IS_FD_ARRAY(map)) {
+		err = bpf_fd_array_map_lookup_elem(map, key, value);
+	} else if (IS_FD_HASH(map)) {
+		err = bpf_fd_htab_map_lookup_elem(map, key, value);
 	} else {
 		rcu_read_lock();
 		ptr = map->ops->map_lookup_elem(map, key);
@@ -1069,6 +1084,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_CGROUP_INET_SOCK_CREATE:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 		break;
+	case BPF_CGROUP_SOCK_OPS:
+		ptype = BPF_PROG_TYPE_SOCK_OPS;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1109,6 +1127,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_CGROUP_INET_INGRESS:
 	case BPF_CGROUP_INET_EGRESS:
 	case BPF_CGROUP_INET_SOCK_CREATE:
+	case BPF_CGROUP_SOCK_OPS:
 		cgrp = cgroup_get_from_fd(attr->target_fd);
 		if (IS_ERR(cgrp))
 			return PTR_ERR(cgrp);
@@ -1123,6 +1142,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 
 	return ret;
 }
+
 #endif /* CONFIG_CGROUP_BPF */
 
 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 74ea96ea391b..6a86723c5b64 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -546,20 +546,6 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
 	return 0;
 }
 
-static int bpf_size_to_bytes(int bpf_size)
-{
-	if (bpf_size == BPF_W)
-		return 4;
-	else if (bpf_size == BPF_H)
-		return 2;
-	else if (bpf_size == BPF_B)
-		return 1;
-	else if (bpf_size == BPF_DW)
-		return 8;
-	else
-		return -EINVAL;
-}
-
 static bool is_spillable_regtype(enum bpf_reg_type type)
 {
 	switch (type) {
@@ -761,7 +747,9 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
-	struct bpf_insn_access_aux info = { .reg_type = *reg_type };
+	struct bpf_insn_access_aux info = {
+		.reg_type = *reg_type,
+	};
 
 	/* for analyzer ctx accesses are already validated and converted */
 	if (env->analyzer_ops)
@@ -769,25 +757,14 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 
 	if (env->prog->aux->ops->is_valid_access &&
 	    env->prog->aux->ops->is_valid_access(off, size, t, &info)) {
-		/* a non zero info.ctx_field_size indicates:
-		 * . For this field, the prog type specific ctx conversion algorithm
-		 *   only supports whole field access.
-		 * . This ctx access is a candiate for later verifier transformation
-		 *   to load the whole field and then apply a mask to get correct result.
-		 * a non zero info.converted_op_size indicates perceived actual converted
-		 * value width in convert_ctx_access.
+		/* A non zero info.ctx_field_size indicates that this field is a
+		 * candidate for later verifier transformation to load the whole
+		 * field and then apply a mask when accessed with a narrower
+		 * access than actual ctx access size. A zero info.ctx_field_size
+		 * will only allow for whole field access and rejects any other
+		 * type of narrower access.
 		 */
-		if ((info.ctx_field_size && !info.converted_op_size) ||
-		    (!info.ctx_field_size &&  info.converted_op_size)) {
-			verbose("verifier bug in is_valid_access prog type=%u off=%d size=%d\n",
-				env->prog->type, off, size);
-			return -EACCES;
-		}
-
-		if (info.ctx_field_size) {
-			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
-			env->insn_aux_data[insn_idx].converted_op_size = info.converted_op_size;
-		}
+		env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
 		*reg_type = info.reg_type;
 
 		/* remember the offset of last byte accessed in ctx */
@@ -1016,6 +993,11 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 	if (err)
 		return err;
 
+	if (is_pointer_value(env, insn->src_reg)) {
+		verbose("R%d leaks addr into mem\n", insn->src_reg);
+		return -EACCES;
+	}
+
 	/* check whether atomic_add can read the memory */
 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 			       BPF_SIZE(insn->code), BPF_READ, -1);
@@ -1675,6 +1657,65 @@ static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
+static int evaluate_reg_imm_alu_unknown(struct bpf_verifier_env *env,
+					struct bpf_insn *insn)
+{
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+	u8 opcode = BPF_OP(insn->code);
+	s64 imm_log2 = __ilog2_u64((long long)dst_reg->imm);
+
+	/* BPF_X code with src_reg->type UNKNOWN_VALUE here. */
+	if (src_reg->imm > 0 && dst_reg->imm) {
+		switch (opcode) {
+		case BPF_ADD:
+			/* dreg += sreg
+			 * where both have zero upper bits. Adding them
+			 * can only result making one more bit non-zero
+			 * in the larger value.
+			 * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
+			 *     0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
+			 */
+			dst_reg->imm = min(src_reg->imm, 63 - imm_log2);
+			dst_reg->imm--;
+			break;
+		case BPF_AND:
+			/* dreg &= sreg
+			 * AND can not extend zero bits only shrink
+			 * Ex.  0x00..00ffffff
+			 *    & 0x0f..ffffffff
+			 *     ----------------
+			 *      0x00..00ffffff
+			 */
+			dst_reg->imm = max(src_reg->imm, 63 - imm_log2);
+			break;
+		case BPF_OR:
+			/* dreg |= sreg
+			 * OR can only extend zero bits
+			 * Ex.  0x00..00ffffff
+			 *    | 0x0f..ffffffff
+			 *     ----------------
+			 *      0x0f..00ffffff
+			 */
+			dst_reg->imm = min(src_reg->imm, 63 - imm_log2);
+			break;
+		case BPF_SUB:
+		case BPF_MUL:
+		case BPF_RSH:
+		case BPF_LSH:
+			/* These may be flushed out later */
+		default:
+			mark_reg_unknown_value(regs, insn->dst_reg);
+		}
+	} else {
+		mark_reg_unknown_value(regs, insn->dst_reg);
+	}
+
+	dst_reg->type = UNKNOWN_VALUE;
+	return 0;
+}
+
 static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
 				struct bpf_insn *insn)
 {
@@ -1684,6 +1725,9 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
 	u8 opcode = BPF_OP(insn->code);
 	u64 dst_imm = dst_reg->imm;
 
+	if (BPF_SRC(insn->code) == BPF_X && src_reg->type == UNKNOWN_VALUE)
+		return evaluate_reg_imm_alu_unknown(env, insn);
+
 	/* dst_reg->type == CONST_IMM here. Simulate execution of insns
 	 * containing ALU ops. Don't care about overflow or negative
 	 * values, just add/sub/... them; registers are in u64.
@@ -3396,11 +3440,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
 static int convert_ctx_accesses(struct bpf_verifier_env *env)
 {
 	const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+	int i, cnt, size, ctx_field_size, delta = 0;
 	const int insn_cnt = env->prog->len;
 	struct bpf_insn insn_buf[16], *insn;
 	struct bpf_prog *new_prog;
 	enum bpf_access_type type;
-	int i, cnt, off, size, ctx_field_size, converted_op_size, is_narrower_load, delta = 0;
+	bool is_narrower_load;
+	u32 target_size;
 
 	if (ops->gen_prologue) {
 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
@@ -3440,39 +3486,50 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
 			continue;
 
-		off = insn->off;
-		size = bpf_size_to_bytes(BPF_SIZE(insn->code));
 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
-		converted_op_size = env->insn_aux_data[i + delta].converted_op_size;
-		is_narrower_load = type == BPF_READ && size < ctx_field_size;
+		size = BPF_LDST_BYTES(insn);
 
 		/* If the read access is a narrower load of the field,
 		 * convert to a 4/8-byte load, to minimum program type specific
 		 * convert_ctx_access changes. If conversion is successful,
 		 * we will apply proper mask to the result.
 		 */
+		is_narrower_load = size < ctx_field_size;
 		if (is_narrower_load) {
-			int size_code = BPF_H;
+			u32 off = insn->off;
+			u8 size_code;
 
+			if (type == BPF_WRITE) {
+				verbose("bpf verifier narrow ctx access misconfigured\n");
+				return -EINVAL;
+			}
+
+			size_code = BPF_H;
 			if (ctx_field_size == 4)
 				size_code = BPF_W;
 			else if (ctx_field_size == 8)
 				size_code = BPF_DW;
+
 			insn->off = off & ~(ctx_field_size - 1);
 			insn->code = BPF_LDX | BPF_MEM | size_code;
 		}
-		cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog);
-		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+
+		target_size = 0;
+		cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
+					      &target_size);
+		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
+		    (ctx_field_size && !target_size)) {
 			verbose("bpf verifier is misconfigured\n");
 			return -EINVAL;
 		}
-		if (is_narrower_load && size < converted_op_size) {
+
+		if (is_narrower_load && size < target_size) {
 			if (ctx_field_size <= 4)
 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
-							(1 << size * 8) - 1);
+								(1 << size * 8) - 1);
 			else
 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
-							(1 << size * 8) - 1);
+								(1 << size * 8) - 1);
 		}
 
 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 2831480c63a2..ee97196bb151 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -580,7 +580,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 	int ret = -ENOMEM, max_order = 0;
 
 	if (!has_aux(event))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) {
 		/*
diff --git a/kernel/signal.c b/kernel/signal.c
index ca92bcfeb322..45b4c1ffe14e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -510,7 +510,8 @@ int unhandled_signal(struct task_struct *tsk, int sig)
 	return !tsk->ptrace;
 }
 
-static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
+			   bool *resched_timer)
 {
 	struct sigqueue *q, *first = NULL;
 
@@ -532,6 +533,12 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 still_pending:
 		list_del_init(&first->list);
 		copy_siginfo(info, &first->info);
+
+		*resched_timer =
+			(first->flags & SIGQUEUE_PREALLOC) &&
+			(info->si_code == SI_TIMER) &&
+			(info->si_sys_private);
+
 		__sigqueue_free(first);
 	} else {
 		/*
@@ -548,12 +555,12 @@ still_pending:
 }
 
 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
-			siginfo_t *info)
+			siginfo_t *info, bool *resched_timer)
 {
 	int sig = next_signal(pending, mask);
 
 	if (sig)
-		collect_signal(sig, pending, info);
+		collect_signal(sig, pending, info, resched_timer);
 	return sig;
 }
 
@@ -565,15 +572,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
  */
 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 {
+	bool resched_timer = false;
 	int signr;
 
 	/* We only dequeue private signals from ourselves, we don't let
 	 * signalfd steal them
 	 */
-	signr = __dequeue_signal(&tsk->pending, mask, info);
+	signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
 	if (!signr) {
 		signr = __dequeue_signal(&tsk->signal->shared_pending,
-					 mask, info);
+					 mask, info, &resched_timer);
 #ifdef CONFIG_POSIX_TIMERS
 		/*
 		 * itimer signal ?
@@ -621,7 +629,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 		current->jobctl |= JOBCTL_STOP_DEQUEUED;
 	}
 #ifdef CONFIG_POSIX_TIMERS
-	if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
+	if (resched_timer) {
 		/*
 		 * Release the siglock to ensure proper locking order
 		 * of timer locks outside of siglocks.  Note, we leave
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9652bc57fd09..b602c48cb841 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 	tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function.  This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+	struct clocksource *clock = READ_ONCE(tkr->clock);
+
+	return clock->read(clock);
+}
+
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
 
@@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 	 */
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		now = tkr->read(tkr->clock);
+		now = tk_clock_read(tkr);
 		last = tkr->cycle_last;
 		mask = tkr->mask;
 		max = tkr->clock->max_cycles;
@@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 	u64 cycle_now, delta;
 
 	/* read clocksource */
-	cycle_now = tkr->read(tkr->clock);
+	cycle_now = tk_clock_read(tkr);
 
 	/* calculate the delta since the last update_wall_time */
 	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	++tk->cs_was_changed_seq;
 	old_clock = tk->tkr_mono.clock;
 	tk->tkr_mono.clock = clock;
-	tk->tkr_mono.read = clock->read;
 	tk->tkr_mono.mask = clock->mask;
-	tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+	tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
 
 	tk->tkr_raw.clock = clock;
-	tk->tkr_raw.read = clock->read;
 	tk->tkr_raw.mask = clock->mask;
 	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 
@@ -262,7 +280,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	/* Go back from cycles -> shifted ns */
 	tk->xtime_interval = interval * clock->mult;
 	tk->xtime_remainder = ntpinterval - tk->xtime_interval;
-	tk->raw_interval = (interval * clock->mult) >> clock->shift;
+	tk->raw_interval = interval * clock->mult;
 
 	 /* if changing clocks, convert xtime_nsec shift units */
 	if (old_clock) {
@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 
 		now += timekeeping_delta_to_ns(tkr,
 				clocksource_delta(
-					tkr->read(tkr->clock),
+					tk_clock_read(tkr),
 					tkr->cycle_last,
 					tkr->mask));
 	} while (read_seqcount_retry(&tkf->seq, seq));
@@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs)
 	return cycles_at_suspend;
 }
 
+static struct clocksource dummy_clock = {
+	.read = dummy_clock_read,
+};
+
 /**
  * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
  * @tk: Timekeeper to snapshot.
@@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
 	struct tk_read_base *tkr = &tk->tkr_mono;
 
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-	cycles_at_suspend = tkr->read(tkr->clock);
-	tkr_dummy.read = dummy_clock_read;
+	cycles_at_suspend = tk_clock_read(tkr);
+	tkr_dummy.clock = &dummy_clock;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 
 	tkr = &tk->tkr_raw;
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-	tkr_dummy.read = dummy_clock_read;
+	tkr_dummy.clock = &dummy_clock;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
@@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-	struct clocksource *clock = tk->tkr_mono.clock;
 	u64 cycle_now, delta;
 	u64 nsec;
 
-	cycle_now = tk->tkr_mono.read(clock);
+	cycle_now = tk_clock_read(&tk->tkr_mono);
 	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 	tk->tkr_mono.cycle_last = cycle_now;
 	tk->tkr_raw.cycle_last  = cycle_now;
@@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-
-		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		now = tk_clock_read(&tk->tkr_mono);
 		systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
 		systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
 		base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
 		 * Check whether the system counter value provided by the
 		 * device driver is on the current timekeeping interval.
 		 */
-		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		now = tk_clock_read(&tk->tkr_mono);
 		interval_start = tk->tkr_mono.cycle_last;
 		if (!cycle_between(interval_start, cycles, now)) {
 			clock_was_set_seq = tk->clock_was_set_seq;
@@ -1629,7 +1649,7 @@ void timekeeping_resume(void)
 	 * The less preferred source will only be tried if there is no better
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
-	cycle_now = tk->tkr_mono.read(clock);
+	cycle_now = tk_clock_read(&tk->tkr_mono);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
 		cycle_now > tk->tkr_mono.cycle_last) {
 		u64 nsec, cyc_delta;
@@ -1976,7 +1996,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
 				    u32 shift, unsigned int *clock_set)
 {
 	u64 interval = tk->cycle_interval << shift;
-	u64 raw_nsecs;
+	u64 snsec_per_sec;
 
 	/* If the offset is smaller than a shifted interval, do nothing */
 	if (offset < interval)
@@ -1991,14 +2011,15 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
 	*clock_set |= accumulate_nsecs_to_secs(tk);
 
 	/* Accumulate raw time */
-	raw_nsecs = (u64)tk->raw_interval << shift;
-	raw_nsecs += tk->raw_time.tv_nsec;
-	if (raw_nsecs >= NSEC_PER_SEC) {
-		u64 raw_secs = raw_nsecs;
-		raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
-		tk->raw_time.tv_sec += raw_secs;
+	tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
+	tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+	snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+	while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+		tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+		tk->raw_time.tv_sec++;
 	}
-	tk->raw_time.tv_nsec = raw_nsecs;
+	tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
+	tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
 
 	/* Accumulate error between NTP and clock interval */
 	tk->ntp_error += tk->ntp_tick << shift;
@@ -2030,7 +2051,7 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+	offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
 				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 97c46b440cd6..37385193a608 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -122,8 +122,8 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
 }
 
 /*
- * limited trace_printk()
- * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
+ * Only limited trace_printk() conversion specifiers allowed:
+ * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
  */
 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
 	   u64, arg2, u64, arg3)
@@ -198,7 +198,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
 			i++;
 		}
 
-		if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
+		if (fmt[i] != 'i' && fmt[i] != 'd' &&
+		    fmt[i] != 'u' && fmt[i] != 'x')
 			return -EINVAL;
 		fmt_cnt++;
 	}
@@ -583,7 +584,8 @@ const struct bpf_verifier_ops tracepoint_prog_ops = {
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 				    struct bpf_insn_access_aux *info)
 {
-	int sample_period_off;
+	const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
+					 sample_period);
 
 	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
 		return false;
@@ -592,43 +594,35 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
 	if (off % size != 0)
 		return false;
 
-	/* permit 1, 2, 4 byte narrower and 8 normal read access to sample_period */
-	sample_period_off = offsetof(struct bpf_perf_event_data, sample_period);
-	if (off >= sample_period_off && off < sample_period_off + sizeof(__u64)) {
-		int allowed;
-
-#ifdef __LITTLE_ENDIAN
-		allowed = (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0;
-#else
-		allowed = ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0;
-#endif
-		if (!allowed)
+	switch (off) {
+	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
+		bpf_ctx_record_field_size(info, size_sp);
+		if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
 			return false;
-		info->ctx_field_size = 8;
-		info->converted_op_size = 8;
-	} else {
+		break;
+	default:
 		if (size != sizeof(long))
 			return false;
 	}
+
 	return true;
 }
 
 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
 				      const struct bpf_insn *si,
 				      struct bpf_insn *insn_buf,
-				      struct bpf_prog *prog)
+				      struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 
 	switch (si->off) {
 	case offsetof(struct bpf_perf_event_data, sample_period):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
-
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
 						       data), si->dst_reg, si->src_reg,
 				      offsetof(struct bpf_perf_event_data_kern, data));
 		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
-				      offsetof(struct perf_sample_data, period));
+				      bpf_target_off(struct perf_sample_data, period, 8,
+						     target_size));
 		break;
 	default:
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 3c6432df7e63..4c0888c4a68d 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -23,14 +23,14 @@
  *	the values[M, M+1, ..., N] into the ints array in get_options.
  */
 
-static int get_range(char **str, int *pint)
+static int get_range(char **str, int *pint, int n)
 {
 	int x, inc_counter, upper_range;
 
 	(*str)++;
 	upper_range = simple_strtol((*str), NULL, 0);
 	inc_counter = upper_range - *pint;
-	for (x = *pint; x < upper_range; x++)
+	for (x = *pint; n && x < upper_range; x++, n--)
 		*pint++ = x;
 	return inc_counter;
 }
@@ -97,7 +97,7 @@ char *get_options(const char *str, int nints, int *ints)
 			break;
 		if (res == 3) {
 			int range_nums;
-			range_nums = get_range((char **)&str, ints + i);
+			range_nums = get_range((char **)&str, ints + i, nints - i);
 			if (range_nums < 0)
 				break;
 			/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 945fd1ca49b5..df4ebdb2b10a 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -652,7 +652,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 			spin_unlock(ptl);
 			free_page_and_swap_cache(src_page);
 		}
-		cond_resched();
 	}
 }
 
diff --git a/mm/slub.c b/mm/slub.c
index 7449593fca72..8addc535bcdc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5625,6 +5625,28 @@ static char *create_unique_id(struct kmem_cache *s)
 	return name;
 }
 
+static void sysfs_slab_remove_workfn(struct work_struct *work)
+{
+	struct kmem_cache *s =
+		container_of(work, struct kmem_cache, kobj_remove_work);
+
+	if (!s->kobj.state_in_sysfs)
+		/*
+		 * For a memcg cache, this may be called during
+		 * deactivation and again on shutdown.  Remove only once.
+		 * A cache is never shut down before deactivation is
+		 * complete, so no need to worry about synchronization.
+		 */
+		return;
+
+#ifdef CONFIG_MEMCG
+	kset_unregister(s->memcg_kset);
+#endif
+	kobject_uevent(&s->kobj, KOBJ_REMOVE);
+	kobject_del(&s->kobj);
+	kobject_put(&s->kobj);
+}
+
 static int sysfs_slab_add(struct kmem_cache *s)
 {
 	int err;
@@ -5632,6 +5654,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
 	struct kset *kset = cache_kset(s);
 	int unmergeable = slab_unmergeable(s);
 
+	INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
+
 	if (!kset) {
 		kobject_init(&s->kobj, &slab_ktype);
 		return 0;
@@ -5695,20 +5719,8 @@ static void sysfs_slab_remove(struct kmem_cache *s)
 		 */
 		return;
 
-	if (!s->kobj.state_in_sysfs)
-		/*
-		 * For a memcg cache, this may be called during
-		 * deactivation and again on shutdown.  Remove only once.
-		 * A cache is never shut down before deactivation is
-		 * complete, so no need to worry about synchronization.
-		 */
-		return;
-
-#ifdef CONFIG_MEMCG
-	kset_unregister(s->memcg_kset);
-#endif
-	kobject_uevent(&s->kobj, KOBJ_REMOVE);
-	kobject_del(&s->kobj);
+	kobject_get(&s->kobj);
+	schedule_work(&s->kobj_remove_work);
 }
 
 void sysfs_slab_release(struct kmem_cache *s)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 34a1c3e46ed7..ecc97f74ab18 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -287,10 +287,21 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	if (p4d_none(*p4d))
 		return NULL;
 	pud = pud_offset(p4d, addr);
-	if (pud_none(*pud))
+
+	/*
+	 * Don't dereference bad PUD or PMD (below) entries. This will also
+	 * identify huge mappings, which we may encounter on architectures
+	 * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
+	 * identified as vmalloc addresses by is_vmalloc_addr(), but are
+	 * not [unambiguously] associated with a struct page, so there is
+	 * no correct value to return for them.
+	 */
+	WARN_ON_ONCE(pud_bad(*pud));
+	if (pud_none(*pud) || pud_bad(*pud))
 		return NULL;
 	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	WARN_ON_ONCE(pmd_bad(*pmd));
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
 		return NULL;
 
 	ptep = pte_offset_map(pmd, addr);
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index fca84e111c89..4e111196f902 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -252,7 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
 
 	ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
-	atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = atmvcc->atm_options;
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index a7e4018370b4..f271a7bcf5b2 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -137,11 +137,11 @@ static int neigh_check_cb(struct neighbour *n)
 	if (entry->vccs || time_before(jiffies, entry->expires))
 		return 0;
 
-	if (atomic_read(&n->refcnt) > 1) {
+	if (refcount_read(&n->refcnt) > 1) {
 		struct sk_buff *skb;
 
 		pr_debug("destruction postponed with ref %d\n",
-			 atomic_read(&n->refcnt));
+			 refcount_read(&n->refcnt));
 
 		while ((skb = skb_dequeue(&n->arp_queue)) != NULL)
 			dev_kfree_skb(skb);
@@ -381,7 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
 		memcpy(here, llc_oui, sizeof(llc_oui));
 		((__be16 *) here)[3] = skb->protocol;
 	}
-	atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = vcc->atm_options;
 	entry->vccs->last_use = jiffies;
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev);
@@ -767,7 +767,7 @@ static void atmarp_info(struct seq_file *seq, struct neighbour *n,
 			seq_printf(seq, "(resolving)\n");
 		else
 			seq_printf(seq, "(expired, ref %d)\n",
-				   atomic_read(&entry->neigh->refcnt));
+				   refcount_read(&entry->neigh->refcnt));
 	} else if (!svc) {
 		seq_printf(seq, "%d.%d.%d\n",
 			   clip_vcc->vcc->dev->number,
diff --git a/net/atm/common.c b/net/atm/common.c
index f06422f4108d..8a4f99114cd2 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -80,9 +80,9 @@ static void vcc_sock_destruct(struct sock *sk)
 		printk(KERN_DEBUG "%s: rmem leakage (%d bytes) detected.\n",
 		       __func__, atomic_read(&sk->sk_rmem_alloc));
 
-	if (atomic_read(&sk->sk_wmem_alloc))
+	if (refcount_read(&sk->sk_wmem_alloc))
 		printk(KERN_DEBUG "%s: wmem leakage (%d bytes) detected.\n",
-		       __func__, atomic_read(&sk->sk_wmem_alloc));
+		       __func__, refcount_read(&sk->sk_wmem_alloc));
 }
 
 static void vcc_def_wakeup(struct sock *sk)
@@ -101,7 +101,7 @@ static inline int vcc_writable(struct sock *sk)
 	struct atm_vcc *vcc = atm_sk(sk);
 
 	return (vcc->qos.txtp.max_sdu +
-		atomic_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf;
+		refcount_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf;
 }
 
 static void vcc_write_space(struct sock *sk)
@@ -156,7 +156,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family, i
 	memset(&vcc->local, 0, sizeof(struct sockaddr_atmsvc));
 	memset(&vcc->remote, 0, sizeof(struct sockaddr_atmsvc));
 	vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */
-	atomic_set(&sk->sk_wmem_alloc, 1);
+	refcount_set(&sk->sk_wmem_alloc, 1);
 	atomic_set(&sk->sk_rmem_alloc, 0);
 	vcc->push = NULL;
 	vcc->pop = NULL;
@@ -630,7 +630,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
 		goto out;
 	}
 	pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
-	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
 
 	skb->dev = NULL; /* for paths shared with net_device interfaces */
 	ATM_SKB(skb)->atm_options = vcc->atm_options;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 09cfe87f0a44..75545717fa46 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -181,7 +181,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
 	ATM_SKB(skb)->vcc = vcc;
 	ATM_SKB(skb)->atm_options = vcc->atm_options;
 
-	atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
 	if (vcc->send(vcc, skb) < 0) {
 		dev->stats.tx_dropped++;
 		return;
@@ -345,7 +345,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 	int i;
 	char *tmp;		/* FIXME */
 
-	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
 	mesg = (struct atmlec_msg *)skb->data;
 	tmp = skb->data;
 	tmp += sizeof(struct atmlec_msg);
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index a190800572bd..680a4b9095a1 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -555,7 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
 					sizeof(struct llc_snap_hdr));
 	}
 
-	atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = entry->shortcut->atm_options;
 	entry->shortcut->send(entry->shortcut, skb);
 	entry->packets_fwded++;
@@ -911,7 +911,7 @@ static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	struct mpoa_client *mpc = find_mpc_by_vcc(vcc);
 	struct k_message *mesg = (struct k_message *)skb->data;
-	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
 
 	if (mpc == NULL) {
 		pr_info("no mpc found\n");
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index c4e09846d1de..21d9d341a619 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -350,7 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
 		return 1;
 	}
 
-	atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
 	ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
 	pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
 		 skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
diff --git a/net/atm/proc.c b/net/atm/proc.c
index bbb6461a4b7f..27c9c01c537d 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -211,7 +211,7 @@ static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc)
 		   vcc->flags, sk->sk_err,
 		   sk_wmem_alloc_get(sk), sk->sk_sndbuf,
 		   sk_rmem_alloc_get(sk), sk->sk_rcvbuf,
-		   atomic_read(&sk->sk_refcnt));
+		   refcount_read(&sk->sk_refcnt));
 }
 
 static void svc_info(struct seq_file *seq, struct atm_vcc *vcc)
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 2e17e97a7a8b..821c0797553d 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -35,7 +35,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	pr_debug("(%d) %d -= %d\n",
 		 vcc->vci, sk_wmem_alloc_get(sk), skb->truesize);
-	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
 	dev_kfree_skb_any(skb);
 	sk->sk_write_space(sk);
 }
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index f640a99e14b8..983c3a21a133 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -67,7 +67,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
 	struct sock *sk;
 
 	msg = (struct atmsvc_msg *) skb->data;
-	atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
 	vcc = *(struct atm_vcc **) &msg->vcc;
 	pr_debug("%d (0x%lx)\n", (int)msg->type, (unsigned long)vcc);
 	sk = sk_atm(vcc);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 8a8f77a247e6..91e3ba280706 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -657,7 +657,7 @@ static int bt_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq,
 			   "%pK %-6d %-6u %-6u %-6u %-6lu %-6lu",
 			   sk,
-			   atomic_read(&sk->sk_refcnt),
+			   refcount_read(&sk->sk_refcnt),
 			   sk_rmem_alloc_get(sk),
 			   sk_wmem_alloc_get(sk),
 			   from_kuid(seq_user_ns(seq), sock_i_uid(sk)),
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 9a40013da915..7b3965861013 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -481,16 +481,16 @@ static int bnep_session(void *arg)
 	struct net_device *dev = s->dev;
 	struct sock *sk = s->sock->sk;
 	struct sk_buff *skb;
-	wait_queue_t wait;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	BT_DBG("");
 
 	set_user_nice(current, -15);
 
-	init_waitqueue_entry(&wait, current);
 	add_wait_queue(sk_sleep(sk), &wait);
 	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
+		/* Ensure session->terminate is updated */
+		smp_mb__before_atomic();
 
 		if (atomic_read(&s->terminate))
 			break;
@@ -512,9 +512,8 @@ static int bnep_session(void *arg)
 				break;
 		netif_wake_queue(dev);
 
-		schedule();
+		wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 	}
-	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk_sleep(sk), &wait);
 
 	/* Cleanup session */
@@ -663,7 +662,7 @@ int bnep_del_connection(struct bnep_conndel_req *req)
 	s = __bnep_get_session(req->dst);
 	if (s) {
 		atomic_inc(&s->terminate);
-		wake_up_process(s->task);
+		wake_up_interruptible(sk_sleep(s->sock->sk));
 	} else
 		err = -ENOENT;
 
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index f4c64ef01c24..7f26a5a19ff6 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -280,16 +280,16 @@ static int cmtp_session(void *arg)
 	struct cmtp_session *session = arg;
 	struct sock *sk = session->sock->sk;
 	struct sk_buff *skb;
-	wait_queue_t wait;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	BT_DBG("session %p", session);
 
 	set_user_nice(current, -15);
 
-	init_waitqueue_entry(&wait, current);
 	add_wait_queue(sk_sleep(sk), &wait);
 	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
+		/* Ensure session->terminate is updated */
+		smp_mb__before_atomic();
 
 		if (atomic_read(&session->terminate))
 			break;
@@ -306,9 +306,8 @@ static int cmtp_session(void *arg)
 
 		cmtp_process_transmit(session);
 
-		schedule();
+		wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 	}
-	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk_sleep(sk), &wait);
 
 	down_write(&cmtp_session_sem);
@@ -393,7 +392,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
 		err = cmtp_attach_device(session);
 		if (err < 0) {
 			atomic_inc(&session->terminate);
-			wake_up_process(session->task);
+			wake_up_interruptible(sk_sleep(session->sock->sk));
 			up_write(&cmtp_session_sem);
 			return err;
 		}
@@ -431,7 +430,11 @@ int cmtp_del_connection(struct cmtp_conndel_req *req)
 
 		/* Stop session thread */
 		atomic_inc(&session->terminate);
-		wake_up_process(session->task);
+
+		/* Ensure session->terminate is updated */
+		smp_mb__after_atomic();
+
+		wake_up_interruptible(sk_sleep(session->sock->sk));
 	} else
 		err = -ENOENT;
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d860e3cc23cf..6bc679cd3481 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3096,15 +3096,14 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
 
-	hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
-					  WQ_MEM_RECLAIM, 1, hdev->name);
+	hdev->workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI, hdev->name);
 	if (!hdev->workqueue) {
 		error = -ENOMEM;
 		goto err;
 	}
 
-	hdev->req_workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
-					      WQ_MEM_RECLAIM, 1, hdev->name);
+	hdev->req_workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI,
+						      hdev->name);
 	if (!hdev->req_workqueue) {
 		destroy_workqueue(hdev->workqueue);
 		error = -ENOMEM;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 961f7f53e178..472b3907b1b0 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -36,6 +36,7 @@
 #define VERSION "1.2"
 
 static DECLARE_RWSEM(hidp_session_sem);
+static DECLARE_WAIT_QUEUE_HEAD(hidp_session_wq);
 static LIST_HEAD(hidp_session_list);
 
 static unsigned char hidp_keycode[256] = {
@@ -1068,12 +1069,12 @@ static int hidp_session_start_sync(struct hidp_session *session)
  * Wake up session thread and notify it to stop. This is asynchronous and
  * returns immediately. Call this whenever a runtime error occurs and you want
  * the session to stop.
- * Note: wake_up_process() performs any necessary memory-barriers for us.
+ * Note: wake_up_interruptible() performs any necessary memory-barriers for us.
  */
 static void hidp_session_terminate(struct hidp_session *session)
 {
 	atomic_inc(&session->terminate);
-	wake_up_process(session->task);
+	wake_up_interruptible(&hidp_session_wq);
 }
 
 /*
@@ -1180,7 +1181,9 @@ static void hidp_session_run(struct hidp_session *session)
 	struct sock *ctrl_sk = session->ctrl_sock->sk;
 	struct sock *intr_sk = session->intr_sock->sk;
 	struct sk_buff *skb;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
+	add_wait_queue(&hidp_session_wq, &wait);
 	for (;;) {
 		/*
 		 * This thread can be woken up two ways:
@@ -1188,12 +1191,10 @@ static void hidp_session_run(struct hidp_session *session)
 		 *    session->terminate flag and wakes this thread up.
 		 *  - Via modifying the socket state of ctrl/intr_sock. This
 		 *    thread is woken up by ->sk_state_changed().
-		 *
-		 * Note: set_current_state() performs any necessary
-		 * memory-barriers for us.
 		 */
-		set_current_state(TASK_INTERRUPTIBLE);
 
+		/* Ensure session->terminate is updated */
+		smp_mb__before_atomic();
 		if (atomic_read(&session->terminate))
 			break;
 
@@ -1227,11 +1228,22 @@ static void hidp_session_run(struct hidp_session *session)
 		hidp_process_transmit(session, &session->ctrl_transmit,
 				      session->ctrl_sock);
 
-		schedule();
+		wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 	}
+	remove_wait_queue(&hidp_session_wq, &wait);
 
 	atomic_inc(&session->terminate);
-	set_current_state(TASK_RUNNING);
+
+	/* Ensure session->terminate is updated */
+	smp_mb__after_atomic();
+}
+
+static int hidp_session_wake_function(wait_queue_t *wait,
+				      unsigned int mode,
+				      int sync, void *key)
+{
+	wake_up_interruptible(&hidp_session_wq);
+	return false;
 }
 
 /*
@@ -1244,7 +1256,8 @@ static void hidp_session_run(struct hidp_session *session)
 static int hidp_session_thread(void *arg)
 {
 	struct hidp_session *session = arg;
-	wait_queue_t ctrl_wait, intr_wait;
+	DEFINE_WAIT_FUNC(ctrl_wait, hidp_session_wake_function);
+	DEFINE_WAIT_FUNC(intr_wait, hidp_session_wake_function);
 
 	BT_DBG("session %p", session);
 
@@ -1254,8 +1267,6 @@ static int hidp_session_thread(void *arg)
 	set_user_nice(current, -15);
 	hidp_set_timer(session);
 
-	init_waitqueue_entry(&ctrl_wait, current);
-	init_waitqueue_entry(&intr_wait, current);
 	add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait);
 	add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
 	/* This memory barrier is paired with wq_has_sleeper(). See
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 507b80d59dec..67a8642f57ea 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -87,7 +87,8 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
 
 	BT_DBG("sk %p", sk);
 
-	if (!addr || addr->sa_family != AF_BLUETOOTH)
+	if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
 	memset(&la, 0, sizeof(la));
@@ -181,7 +182,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
 
 	BT_DBG("sk %p", sk);
 
-	if (!addr || alen < sizeof(addr->sa_family) ||
+	if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
 	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index ac3c650cb234..1aaccf637479 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -197,7 +197,7 @@ static void rfcomm_sock_kill(struct sock *sk)
 	if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
 		return;
 
-	BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+	BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
 
 	/* Kill poor orphan */
 	bt_sock_unlink(&rfcomm_sk_list, sk);
@@ -339,7 +339,8 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr
 	struct sock *sk = sock->sk;
 	int len, err = 0;
 
-	if (!addr || addr->sa_family != AF_BLUETOOTH)
+	if (!addr || addr_len < offsetofend(struct sockaddr, sa_family) ||
+	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
 	memset(&sa, 0, sizeof(sa));
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 728e0c8dc8e7..795e920a3281 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -524,10 +524,8 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr,
 
 	BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
 
-	if (!addr || addr->sa_family != AF_BLUETOOTH)
-		return -EINVAL;
-
-	if (addr_len < sizeof(struct sockaddr_sco))
+	if (!addr || addr_len < sizeof(struct sockaddr_sco) ||
+	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
 	lock_sock(sk);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 067cf0313449..2261e5194c82 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -149,12 +149,12 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 
-	if (atomic_read(&nf_bridge->use) > 1) {
+	if (refcount_read(&nf_bridge->use) > 1) {
 		struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
 
 		if (tmp) {
 			memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
-			atomic_set(&tmp->use, 1);
+			refcount_set(&tmp->use, 1);
 		}
 		nf_bridge_put(nf_bridge);
 		nf_bridge = tmp;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 0b5dd607444c..723f25eed8ea 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -865,7 +865,7 @@ static struct attribute *bridge_attrs[] = {
 	NULL
 };
 
-static struct attribute_group bridge_group = {
+static const struct attribute_group bridge_group = {
 	.name = SYSFS_BRIDGE_ATTR,
 	.attrs = bridge_attrs,
 };
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index e0bb624c3845..dfc86a0199da 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -61,7 +61,7 @@ static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
 	    (strcmp(par->table, "broute") != 0 ||
 	    hook_mask & ~(1 << NF_BR_BROUTING)))
 		return -EINVAL;
-	if (INVALID_TARGET)
+	if (ebt_invalid_target(info->target))
 		return -EINVAL;
 	return 0;
 }
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 66697cbd0a8b..19f0f9592d32 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -44,7 +44,7 @@ static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
 		return -EINVAL;
-	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+	if (ebt_invalid_target(tmp))
 		return -EINVAL;
 	tmp = info->target & ~EBT_VERDICT_BITS;
 	if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 8d2a85e0594e..a7223eaf490b 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -47,7 +47,7 @@ static int ebt_redirect_tg_check(const struct xt_tgchk_param *par)
 	    (strcmp(par->table, "broute") != 0 ||
 	    hook_mask & ~(1 << NF_BR_BROUTING)))
 		return -EINVAL;
-	if (INVALID_TARGET)
+	if (ebt_invalid_target(info->target))
 		return -EINVAL;
 	return 0;
 }
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index e56ccd060d26..11cf9e9e9222 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -51,7 +51,7 @@ static int ebt_snat_tg_check(const struct xt_tgchk_param *par)
 	if (BASE_CHAIN && tmp == EBT_RETURN)
 		return -EINVAL;
 
-	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+	if (ebt_invalid_target(tmp))
 		return -EINVAL;
 	tmp = info->target | EBT_VERDICT_BITS;
 	if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT)
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 7506b853a84d..632d5a416d97 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1013,7 +1013,7 @@ static const struct proto_ops caif_stream_ops = {
 static void caif_sock_destructor(struct sock *sk)
 {
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	caif_assert(!atomic_read(&sk->sk_wmem_alloc));
+	caif_assert(!refcount_read(&sk->sk_wmem_alloc));
 	caif_assert(sk_unhashed(sk));
 	caif_assert(!sk->sk_socket);
 	if (!sock_flag(sk, SOCK_DEAD)) {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e5311a7c70da..454ec8923333 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -188,7 +188,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
 				}
 			}
 			*peeked = 1;
-			atomic_inc(&skb->users);
+			refcount_inc(&skb->users);
 		} else {
 			__skb_unlink(skb, queue);
 			if (destructor)
@@ -358,7 +358,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
 		spin_lock_bh(&sk_queue->lock);
 		if (skb == skb_peek(sk_queue)) {
 			__skb_unlink(skb, sk_queue);
-			atomic_dec(&skb->users);
+			refcount_dec(&skb->users);
 			if (destructor)
 				destructor(sk, skb);
 			err = 0;
@@ -614,7 +614,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 		skb->data_len += copied;
 		skb->len += copied;
 		skb->truesize += truesize;
-		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
+		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
 		while (copied) {
 			int size = min_t(int, copied, PAGE_SIZE - start);
 			skb_fill_page_desc(skb, frag++, pages[n], start, size);
diff --git a/net/core/dev.c b/net/core/dev.c
index a91572aa73d5..7098fba52be1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1862,7 +1862,7 @@ static inline int deliver_skb(struct sk_buff *skb,
 {
 	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
 		return -ENOMEM;
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
@@ -2484,10 +2484,10 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
 	if (unlikely(!skb))
 		return;
 
-	if (likely(atomic_read(&skb->users) == 1)) {
+	if (likely(refcount_read(&skb->users) == 1)) {
 		smp_rmb();
-		atomic_set(&skb->users, 0);
-	} else if (likely(!atomic_dec_and_test(&skb->users))) {
+		refcount_set(&skb->users, 0);
+	} else if (likely(!refcount_dec_and_test(&skb->users))) {
 		return;
 	}
 	get_kfree_skb_cb(skb)->reason = reason;
@@ -3955,7 +3955,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
 
 			clist = clist->next;
 
-			WARN_ON(atomic_read(&skb->users));
+			WARN_ON(refcount_read(&skb->users));
 			if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
 				trace_consume_skb(skb);
 			else
@@ -4844,6 +4844,13 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
 }
 EXPORT_SYMBOL(gro_find_complete_by_type);
 
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+	skb_dst_drop(skb);
+	secpath_reset(skb);
+	kmem_cache_free(skbuff_head_cache, skb);
+}
+
 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
 	switch (ret) {
@@ -4857,13 +4864,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 		break;
 
 	case GRO_MERGED_FREE:
-		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
-			skb_dst_drop(skb);
-			secpath_reset(skb);
-			kmem_cache_free(skbuff_head_cache, skb);
-		} else {
+		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+			napi_skb_free_stolen_head(skb);
+		else
 			__kfree_skb(skb);
-		}
 		break;
 
 	case GRO_HELD:
@@ -4935,10 +4939,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
 		break;
 
 	case GRO_DROP:
-	case GRO_MERGED_FREE:
 		napi_reuse_skb(napi, skb);
 		break;
 
+	case GRO_MERGED_FREE:
+		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+			napi_skb_free_stolen_head(skb);
+		else
+			napi_reuse_skb(napi, skb);
+		break;
+
 	case GRO_MERGED:
 	case GRO_CONSUMED:
 		break;
@@ -7825,7 +7835,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 {
 #if BITS_PER_LONG == 64
 	BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
-	memcpy(stats64, netdev_stats, sizeof(*stats64));
+	memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
 	/* zero out counters that only exist in rtnl_link_stats64 */
 	memset((char *)stats64 + sizeof(*netdev_stats), 0,
 	       sizeof(*stats64) - sizeof(*netdev_stats));
@@ -7867,9 +7877,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 	} else {
 		netdev_stats_to_stats64(storage, &dev->stats);
 	}
-	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
-	storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
-	storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
+	storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+	storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
+	storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
 	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3bba291c6c32..a0093e1b0235 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -46,7 +46,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 	if (r == NULL)
 		return -ENOMEM;
 
-	atomic_set(&r->refcnt, 1);
+	refcount_set(&r->refcnt, 1);
 	r->action = FR_ACT_TO_TBL;
 	r->pref = pref;
 	r->table = table;
@@ -283,7 +283,7 @@ jumped:
 
 		if (err != -EAGAIN) {
 			if ((arg->flags & FIB_LOOKUP_NOREF) ||
-			    likely(atomic_inc_not_zero(&rule->refcnt))) {
+			    likely(refcount_inc_not_zero(&rule->refcnt))) {
 				arg->rule = rule;
 				goto out;
 			}
@@ -517,7 +517,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		last = r;
 	}
 
-	fib_rule_get(rule);
+	refcount_set(&rule->refcnt, 1);
 
 	if (last)
 		list_add_rcu(&rule->list, &last->list);
diff --git a/net/core/filter.c b/net/core/filter.c
index b39c869d22e3..94169572d002 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -54,6 +54,7 @@
 #include <net/dst.h>
 #include <net/sock_reuseport.h>
 #include <net/busy_poll.h>
+#include <net/tcp.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -2011,7 +2012,7 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
 static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 {
 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
-	u32 off = skb->network_header - skb->mac_header;
+	u32 off = skb_mac_header_len(skb);
 	int ret;
 
 	ret = skb_cow(skb, len_diff);
@@ -2047,7 +2048,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 {
 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
-	u32 off = skb->network_header - skb->mac_header;
+	u32 off = skb_mac_header_len(skb);
 	int ret;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2153,6 +2154,124 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		return sizeof(struct iphdr);
+	case htons(ETH_P_IPV6):
+		return sizeof(struct ipv6hdr);
+	default:
+		return ~0U;
+	}
+}
+
+static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
+{
+	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+	int ret;
+
+	ret = skb_cow(skb, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = bpf_skb_net_hdr_push(skb, off, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	if (skb_is_gso(skb)) {
+		/* Due to header grow, MSS needs to be downgraded. */
+		skb_shinfo(skb)->gso_size -= len_diff;
+		/* Header must be checked, and gso_segs recomputed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
+
+	return 0;
+}
+
+static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
+{
+	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+	int ret;
+
+	ret = skb_unclone(skb, GFP_ATOMIC);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	if (skb_is_gso(skb)) {
+		/* Due to header shrink, MSS can be upgraded. */
+		skb_shinfo(skb)->gso_size += len_diff;
+		/* Header must be checked, and gso_segs recomputed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
+
+	return 0;
+}
+
+static u32 __bpf_skb_max_len(const struct sk_buff *skb)
+{
+	return skb->dev->mtu + skb->dev->hard_header_len;
+}
+
+static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
+{
+	bool trans_same = skb->transport_header == skb->network_header;
+	u32 len_cur, len_diff_abs = abs(len_diff);
+	u32 len_min = bpf_skb_net_base_len(skb);
+	u32 len_max = __bpf_skb_max_len(skb);
+	__be16 proto = skb->protocol;
+	bool shrink = len_diff < 0;
+	int ret;
+
+	if (unlikely(len_diff_abs > 0xfffU))
+		return -EFAULT;
+	if (unlikely(proto != htons(ETH_P_IP) &&
+		     proto != htons(ETH_P_IPV6)))
+		return -ENOTSUPP;
+
+	len_cur = skb->len - skb_network_offset(skb);
+	if (skb_transport_header_was_set(skb) && !trans_same)
+		len_cur = skb_network_header_len(skb);
+	if ((shrink && (len_diff_abs >= len_cur ||
+			len_cur - len_diff_abs < len_min)) ||
+	    (!shrink && (skb->len + len_diff_abs > len_max &&
+			 !skb_is_gso(skb))))
+		return -ENOTSUPP;
+
+	ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
+		       bpf_skb_net_grow(skb, len_diff_abs);
+
+	bpf_compute_data_end(skb);
+	return 0;
+}
+
+BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+	   u32, mode, u64, flags)
+{
+	if (unlikely(flags))
+		return -EINVAL;
+	if (likely(mode == BPF_ADJ_ROOM_NET))
+		return bpf_skb_adjust_net(skb, len_diff);
+
+	return -ENOTSUPP;
+}
+
+static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
+	.func		= bpf_skb_adjust_room,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 static u32 __bpf_skb_min_len(const struct sk_buff *skb)
 {
 	u32 min_len = skb_network_offset(skb);
@@ -2165,11 +2284,6 @@ static u32 __bpf_skb_min_len(const struct sk_buff *skb)
 	return min_len;
 }
 
-static u32 __bpf_skb_max_len(const struct sk_buff *skb)
-{
-	return skb->dev->mtu + skb->dev->hard_header_len;
-}
-
 static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
 {
 	unsigned int old_len = skb->len;
@@ -2306,6 +2420,7 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_skb_change_proto ||
 	    func == bpf_skb_change_head ||
 	    func == bpf_skb_change_tail ||
+	    func == bpf_skb_adjust_room ||
 	    func == bpf_skb_pull_data ||
 	    func == bpf_clone_redirect ||
 	    func == bpf_l3_csum_replace ||
@@ -2672,6 +2787,109 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
+	   int, level, int, optname, char *, optval, int, optlen)
+{
+	struct sock *sk = bpf_sock->sk;
+	int ret = 0;
+	int val;
+
+	if (!sk_fullsock(sk))
+		return -EINVAL;
+
+	if (level == SOL_SOCKET) {
+		if (optlen != sizeof(int))
+			return -EINVAL;
+		val = *((int *)optval);
+
+		/* Only some socketops are supported */
+		switch (optname) {
+		case SO_RCVBUF:
+			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+			sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+			break;
+		case SO_SNDBUF:
+			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+			sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+			break;
+		case SO_MAX_PACING_RATE:
+			sk->sk_max_pacing_rate = val;
+			sk->sk_pacing_rate = min(sk->sk_pacing_rate,
+						 sk->sk_max_pacing_rate);
+			break;
+		case SO_PRIORITY:
+			sk->sk_priority = val;
+			break;
+		case SO_RCVLOWAT:
+			if (val < 0)
+				val = INT_MAX;
+			sk->sk_rcvlowat = val ? : 1;
+			break;
+		case SO_MARK:
+			sk->sk_mark = val;
+			break;
+		default:
+			ret = -EINVAL;
+		}
+#ifdef CONFIG_INET
+	} else if (level == SOL_TCP &&
+		   sk->sk_prot->setsockopt == tcp_setsockopt) {
+		if (optname == TCP_CONGESTION) {
+			char name[TCP_CA_NAME_MAX];
+
+			strncpy(name, optval, min_t(long, optlen,
+						    TCP_CA_NAME_MAX-1));
+			name[TCP_CA_NAME_MAX-1] = 0;
+			ret = tcp_set_congestion_control(sk, name, false);
+			if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
+				/* replacing an existing ca */
+				tcp_reinit_congestion_control(sk,
+					inet_csk(sk)->icsk_ca_ops);
+		} else {
+			struct tcp_sock *tp = tcp_sk(sk);
+
+			if (optlen != sizeof(int))
+				return -EINVAL;
+
+			val = *((int *)optval);
+			/* Only some options are supported */
+			switch (optname) {
+			case TCP_BPF_IW:
+				if (val <= 0 || tp->data_segs_out > 0)
+					ret = -EINVAL;
+				else
+					tp->snd_cwnd = val;
+				break;
+			case TCP_BPF_SNDCWND_CLAMP:
+				if (val <= 0) {
+					ret = -EINVAL;
+				} else {
+					tp->snd_cwnd_clamp = val;
+					tp->snd_ssthresh = val;
+				}
+			default:
+				ret = -EINVAL;
+			}
+		}
+		ret = -EINVAL;
+#endif
+	} else {
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_setsockopt_proto = {
+	.func		= bpf_setsockopt,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -2745,6 +2963,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_change_proto_proto;
 	case BPF_FUNC_skb_change_type:
 		return &bpf_skb_change_type_proto;
+	case BPF_FUNC_skb_adjust_room:
+		return &bpf_skb_adjust_room_proto;
 	case BPF_FUNC_skb_change_tail:
 		return &bpf_skb_change_tail_proto;
 	case BPF_FUNC_skb_get_tunnel_key:
@@ -2823,6 +3043,17 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
+	sock_ops_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_setsockopt:
+		return &bpf_setsockopt_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static const struct bpf_func_proto *
 lwt_xmit_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -2857,38 +3088,11 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static void __set_access_aux_info(int off, struct bpf_insn_access_aux *info)
+static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
+				    struct bpf_insn_access_aux *info)
 {
-	info->ctx_field_size = 4;
-	switch (off) {
-	case offsetof(struct __sk_buff, pkt_type) ...
-	     offsetof(struct __sk_buff, pkt_type) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, vlan_present) ...
-	     offsetof(struct __sk_buff, vlan_present) + sizeof(__u32) - 1:
-		info->converted_op_size = 1;
-		break;
-	case offsetof(struct __sk_buff, queue_mapping) ...
-	     offsetof(struct __sk_buff, queue_mapping) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, protocol) ...
-	     offsetof(struct __sk_buff, protocol) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, vlan_tci) ...
-	     offsetof(struct __sk_buff, vlan_tci) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, vlan_proto) ...
-	     offsetof(struct __sk_buff, vlan_proto) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, tc_index) ...
-	     offsetof(struct __sk_buff, tc_index) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, tc_classid) ...
-	     offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
-		info->converted_op_size = 2;
-		break;
-	default:
-		info->converted_op_size = 4;
-	}
-}
+	const int size_default = sizeof(__u32);
 
-static bool __is_valid_access(int off, int size, enum bpf_access_type type,
-			      struct bpf_insn_access_aux *info)
-{
 	if (off < 0 || off >= sizeof(struct __sk_buff))
 		return false;
 
@@ -2897,40 +3101,24 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type,
 		return false;
 
 	switch (off) {
-	case offsetof(struct __sk_buff, cb[0]) ...
-	     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
-		if (off + size >
-		    offsetof(struct __sk_buff, cb[4]) + sizeof(__u32))
+	case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+		if (off + size > offsetofend(struct __sk_buff, cb[4]))
 			return false;
 		break;
-	case offsetof(struct __sk_buff, data) ...
-	     offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
-		if (size != sizeof(__u32))
+	case bpf_ctx_range(struct __sk_buff, data):
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		if (size != size_default)
 			return false;
-		info->reg_type = PTR_TO_PACKET;
-		break;
-	case offsetof(struct __sk_buff, data_end) ...
-	     offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
-		if (size != sizeof(__u32))
-			return false;
-		info->reg_type = PTR_TO_PACKET_END;
 		break;
 	default:
+		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
-			if (size != sizeof(__u32))
+			if (size != size_default)
 				return false;
 		} else {
-			int allowed;
-
-			/* permit narrower load for not cb/data/data_end fields */
-#ifdef __LITTLE_ENDIAN
-			allowed = (off & 0x3) == 0 && size <= 4 && (size & (size - 1)) == 0;
-#else
-			allowed = (off & 0x3) + size == 4 && size <= 4 && (size & (size - 1)) == 0;
-#endif
-			if (!allowed)
+			bpf_ctx_record_field_size(info, size_default);
+			if (!bpf_ctx_narrow_access_ok(off, size, size_default))
 				return false;
-			__set_access_aux_info(off, info);
 		}
 	}
 
@@ -2942,26 +3130,22 @@ static bool sk_filter_is_valid_access(int off, int size,
 				      struct bpf_insn_access_aux *info)
 {
 	switch (off) {
-	case offsetof(struct __sk_buff, tc_classid) ...
-	     offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, data) ...
-	     offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
-	case offsetof(struct __sk_buff, data_end) ...
-	     offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range(struct __sk_buff, data):
+	case bpf_ctx_range(struct __sk_buff, data_end):
 		return false;
 	}
 
 	if (type == BPF_WRITE) {
 		switch (off) {
-		case offsetof(struct __sk_buff, cb[0]) ...
-		     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
 		default:
 			return false;
 		}
 	}
 
-	return __is_valid_access(off, size, type, info);
+	return bpf_skb_is_valid_access(off, size, type, info);
 }
 
 static bool lwt_is_valid_access(int off, int size,
@@ -2969,24 +3153,31 @@ static bool lwt_is_valid_access(int off, int size,
 				struct bpf_insn_access_aux *info)
 {
 	switch (off) {
-	case offsetof(struct __sk_buff, tc_classid) ...
-	     offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
 		return false;
 	}
 
 	if (type == BPF_WRITE) {
 		switch (off) {
-		case offsetof(struct __sk_buff, mark):
-		case offsetof(struct __sk_buff, priority):
-		case offsetof(struct __sk_buff, cb[0]) ...
-		     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+		case bpf_ctx_range(struct __sk_buff, mark):
+		case bpf_ctx_range(struct __sk_buff, priority):
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
 		default:
 			return false;
 		}
 	}
 
-	return __is_valid_access(off, size, type, info);
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, info);
 }
 
 static bool sock_filter_is_valid_access(int off, int size,
@@ -3058,19 +3249,27 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 {
 	if (type == BPF_WRITE) {
 		switch (off) {
-		case offsetof(struct __sk_buff, mark):
-		case offsetof(struct __sk_buff, tc_index):
-		case offsetof(struct __sk_buff, priority):
-		case offsetof(struct __sk_buff, cb[0]) ...
-		     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
-		case offsetof(struct __sk_buff, tc_classid):
+		case bpf_ctx_range(struct __sk_buff, mark):
+		case bpf_ctx_range(struct __sk_buff, tc_index):
+		case bpf_ctx_range(struct __sk_buff, priority):
+		case bpf_ctx_range(struct __sk_buff, tc_classid):
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
 		default:
 			return false;
 		}
 	}
 
-	return __is_valid_access(off, size, type, info);
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, info);
 }
 
 static bool __is_valid_xdp_access(int off, int size)
@@ -3110,101 +3309,141 @@ void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
+static bool __is_valid_sock_ops_access(int off, int size)
+{
+	if (off < 0 || off >= sizeof(struct bpf_sock_ops))
+		return false;
+	/* The verifier guarantees that size > 0. */
+	if (off % size != 0)
+		return false;
+	if (size != sizeof(__u32))
+		return false;
+
+	return true;
+}
+
+static bool sock_ops_is_valid_access(int off, int size,
+				     enum bpf_access_type type,
+				     struct bpf_insn_access_aux *info)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case offsetof(struct bpf_sock_ops, op) ...
+		     offsetof(struct bpf_sock_ops, replylong[3]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	return __is_valid_sock_ops_access(off, size);
+}
+
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
-				  struct bpf_prog *prog)
+				  struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 	int off;
 
 	switch (si->off) {
 	case offsetof(struct __sk_buff, len):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, len));
+				      bpf_target_off(struct sk_buff, len, 4,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, protocol):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, protocol));
+				      bpf_target_off(struct sk_buff, protocol, 2,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, vlan_proto):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
-
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, vlan_proto));
+				      bpf_target_off(struct sk_buff, vlan_proto, 2,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, priority):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
-
 		if (type == BPF_WRITE)
 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, priority));
+					      bpf_target_off(struct sk_buff, priority, 4,
+							     target_size));
 		else
 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, priority));
+					      bpf_target_off(struct sk_buff, priority, 4,
+							     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, ingress_ifindex):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, skb_iif));
+				      bpf_target_off(struct sk_buff, skb_iif, 4,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, ifindex):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
 				      si->dst_reg, si->src_reg,
 				      offsetof(struct sk_buff, dev));
 		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
-				      offsetof(struct net_device, ifindex));
+				      bpf_target_off(struct net_device, ifindex, 4,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, hash):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, hash));
+				      bpf_target_off(struct sk_buff, hash, 4,
+						     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, mark):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-
 		if (type == BPF_WRITE)
 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, mark));
+					      bpf_target_off(struct sk_buff, mark, 4,
+							     target_size));
 		else
 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, mark));
+					      bpf_target_off(struct sk_buff, mark, 4,
+							     target_size));
 		break;
 
 	case offsetof(struct __sk_buff, pkt_type):
-		return convert_skb_access(SKF_AD_PKTTYPE, si->dst_reg,
-					  si->src_reg, insn);
+		*target_size = 1;
+		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
+				      PKT_TYPE_OFFSET());
+		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
+#endif
+		break;
 
 	case offsetof(struct __sk_buff, queue_mapping):
-		return convert_skb_access(SKF_AD_QUEUE, si->dst_reg,
-					  si->src_reg, insn);
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+				      bpf_target_off(struct sk_buff, queue_mapping, 2,
+						     target_size));
+		break;
 
 	case offsetof(struct __sk_buff, vlan_present):
-		return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
-					  si->dst_reg, si->src_reg, insn);
-
 	case offsetof(struct __sk_buff, vlan_tci):
-		return convert_skb_access(SKF_AD_VLAN_TAG,
-					  si->dst_reg, si->src_reg, insn);
+		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+				      bpf_target_off(struct sk_buff, vlan_tci, 2,
+						     target_size));
+		if (si->off == offsetof(struct __sk_buff, vlan_tci)) {
+			*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg,
+						~VLAN_TAG_PRESENT);
+		} else {
+			*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12);
+			*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
+		}
+		break;
 
 	case offsetof(struct __sk_buff, cb[0]) ...
-	     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+	     offsetofend(struct __sk_buff, cb[4]) - 1:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
 		BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
 			      offsetof(struct qdisc_skb_cb, data)) %
@@ -3230,6 +3469,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		off -= offsetof(struct __sk_buff, tc_classid);
 		off += offsetof(struct sk_buff, cb);
 		off += offsetof(struct qdisc_skb_cb, tc_classid);
+		*target_size = 2;
 		if (type == BPF_WRITE)
 			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
 					      si->src_reg, off);
@@ -3255,14 +3495,14 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 
 	case offsetof(struct __sk_buff, tc_index):
 #ifdef CONFIG_NET_SCHED
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
-
 		if (type == BPF_WRITE)
 			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, tc_index));
+					      bpf_target_off(struct sk_buff, tc_index, 2,
+							     target_size));
 		else
 			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-					      offsetof(struct sk_buff, tc_index));
+					      bpf_target_off(struct sk_buff, tc_index, 2,
+							     target_size));
 #else
 		if (type == BPF_WRITE)
 			*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
@@ -3273,10 +3513,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 
 	case offsetof(struct __sk_buff, napi_id):
 #if defined(CONFIG_NET_RX_BUSY_POLL)
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sk_buff, napi_id));
+				      bpf_target_off(struct sk_buff, napi_id, 4,
+						     target_size));
 		*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
 		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
 #else
@@ -3291,7 +3530,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 					  const struct bpf_insn *si,
 					  struct bpf_insn *insn_buf,
-					  struct bpf_prog *prog)
+					  struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -3335,22 +3574,22 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
 					 const struct bpf_insn *si,
 					 struct bpf_insn *insn_buf,
-					 struct bpf_prog *prog)
+					 struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 
 	switch (si->off) {
 	case offsetof(struct __sk_buff, ifindex):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
 				      si->dst_reg, si->src_reg,
 				      offsetof(struct sk_buff, dev));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
-				      offsetof(struct net_device, ifindex));
+				      bpf_target_off(struct net_device, ifindex, 4,
+						     target_size));
 		break;
 	default:
-		return bpf_convert_ctx_access(type, si, insn_buf, prog);
+		return bpf_convert_ctx_access(type, si, insn_buf, prog,
+					      target_size);
 	}
 
 	return insn - insn_buf;
@@ -3359,7 +3598,7 @@ static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
 static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
-				  struct bpf_prog *prog)
+				  struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -3379,6 +3618,139 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
+				       const struct bpf_insn *si,
+				       struct bpf_insn *insn_buf,
+				       struct bpf_prog *prog,
+				       u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+	int off;
+
+	switch (si->off) {
+	case offsetof(struct bpf_sock_ops, op) ...
+	     offsetof(struct bpf_sock_ops, replylong[3]):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
+			     FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
+		BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
+			     FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
+		BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
+			     FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
+		off = si->off;
+		off -= offsetof(struct bpf_sock_ops, op);
+		off += offsetof(struct bpf_sock_ops_kern, op);
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+					      off);
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+					      off);
+		break;
+
+	case offsetof(struct bpf_sock_ops, family):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+					      struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_family));
+		break;
+
+	case offsetof(struct bpf_sock_ops, remote_ip4):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_daddr));
+		break;
+
+	case offsetof(struct bpf_sock_ops, local_ip4):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+					      struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common,
+					       skc_rcv_saddr));
+		break;
+
+	case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
+	     offsetof(struct bpf_sock_ops, remote_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+					  skc_v6_daddr.s6_addr32[0]) != 4);
+
+		off = si->off;
+		off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common,
+					       skc_v6_daddr.s6_addr32[0]) +
+				      off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+		break;
+
+	case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
+	     offsetof(struct bpf_sock_ops, local_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+
+		off = si->off;
+		off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common,
+					       skc_v6_rcv_saddr.s6_addr32[0]) +
+				      off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+		break;
+
+	case offsetof(struct bpf_sock_ops, remote_port):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_dport));
+#ifndef __BIG_ENDIAN_BITFIELD
+		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+#endif
+		break;
+
+	case offsetof(struct bpf_sock_ops, local_port):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_num));
+		break;
+	}
+	return insn - insn_buf;
+}
+
 const struct bpf_verifier_ops sk_filter_prog_ops = {
 	.get_func_proto		= sk_filter_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
@@ -3428,6 +3800,12 @@ const struct bpf_verifier_ops cg_sock_prog_ops = {
 	.convert_ctx_access	= sock_filter_convert_ctx_access,
 };
 
+const struct bpf_verifier_ops sock_ops_prog_ops = {
+	.get_func_proto		= sock_ops_func_proto,
+	.is_valid_access	= sock_ops_is_valid_access,
+	.convert_ctx_access	= sock_ops_convert_ctx_access,
+};
+
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index dadb5eef91c3..e31fc11a8000 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -124,7 +124,7 @@ static bool neigh_del(struct neighbour *n, __u8 state,
 	bool retval = false;
 
 	write_lock(&n->lock);
-	if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
+	if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
 		struct neighbour *neigh;
 
 		neigh = rcu_dereference_protected(n->next,
@@ -254,7 +254,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 			neigh_del_timer(n);
 			n->dead = 1;
 
-			if (atomic_read(&n->refcnt) != 1) {
+			if (refcount_read(&n->refcnt) != 1) {
 				/* The most unpleasant situation.
 				   We must destroy neighbour entry,
 				   but someone still uses it.
@@ -335,7 +335,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
 
 	NEIGH_CACHE_STAT_INC(tbl, allocs);
 	n->tbl		  = tbl;
-	atomic_set(&n->refcnt, 1);
+	refcount_set(&n->refcnt, 1);
 	n->dead		  = 1;
 out:
 	return n;
@@ -444,7 +444,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 	rcu_read_lock_bh();
 	n = __neigh_lookup_noref(tbl, pkey, dev);
 	if (n) {
-		if (!atomic_inc_not_zero(&n->refcnt))
+		if (!refcount_inc_not_zero(&n->refcnt))
 			n = NULL;
 		NEIGH_CACHE_STAT_INC(tbl, hits);
 	}
@@ -473,7 +473,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 	     n = rcu_dereference_bh(n->next)) {
 		if (!memcmp(n->primary_key, pkey, key_len) &&
 		    net_eq(dev_net(n->dev), net)) {
-			if (!atomic_inc_not_zero(&n->refcnt))
+			if (!refcount_inc_not_zero(&n->refcnt))
 				n = NULL;
 			NEIGH_CACHE_STAT_INC(tbl, hits);
 			break;
@@ -709,7 +709,7 @@ static void neigh_parms_destroy(struct neigh_parms *parms);
 
 static inline void neigh_parms_put(struct neigh_parms *parms)
 {
-	if (atomic_dec_and_test(&parms->refcnt))
+	if (refcount_dec_and_test(&parms->refcnt))
 		neigh_parms_destroy(parms);
 }
 
@@ -821,7 +821,7 @@ static void neigh_periodic_work(struct work_struct *work)
 			if (time_before(n->used, n->confirmed))
 				n->used = n->confirmed;
 
-			if (atomic_read(&n->refcnt) == 1 &&
+			if (refcount_read(&n->refcnt) == 1 &&
 			    (state == NUD_FAILED ||
 			     time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
 				*np = n->next;
@@ -1479,7 +1479,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
 	if (p) {
 		p->tbl		  = tbl;
-		atomic_set(&p->refcnt, 1);
+		refcount_set(&p->refcnt, 1);
 		p->reachable_time =
 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
 		dev_hold(dev);
@@ -1542,7 +1542,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
 	INIT_LIST_HEAD(&tbl->parms_list);
 	list_add(&tbl->parms.list, &tbl->parms_list);
 	write_pnet(&tbl->parms.net, &init_net);
-	atomic_set(&tbl->parms.refcnt, 1);
+	refcount_set(&tbl->parms.refcnt, 1);
 	tbl->parms.reachable_time =
 			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
 
@@ -1796,7 +1796,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 
 	if ((parms->dev &&
 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
-	    nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
+	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
@@ -2234,7 +2234,7 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
-	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
+	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
 	read_unlock_bh(&neigh->lock);
 
 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 58e6cc70500d..b4f9922b6f23 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -626,7 +626,7 @@ static struct attribute *netstat_attrs[] = {
 };
 
 
-static struct attribute_group netstat_group = {
+static const struct attribute_group netstat_group = {
 	.name  = "statistics",
 	.attrs  = netstat_attrs,
 };
@@ -636,7 +636,7 @@ static struct attribute *wireless_attrs[] = {
 	NULL
 };
 
-static struct attribute_group wireless_group = {
+static const struct attribute_group wireless_group = {
 	.name = "wireless",
 	.attrs = wireless_attrs,
 };
@@ -1204,7 +1204,7 @@ static struct attribute *dql_attrs[] = {
 	NULL
 };
 
-static struct attribute_group dql_group = {
+static const struct attribute_group dql_group = {
 	.name  = "byte_queue_limits",
 	.attrs  = dql_attrs,
 };
@@ -1448,7 +1448,7 @@ static void *net_grab_current_ns(void)
 	struct net *ns = current->nsproxy->net_ns;
 #ifdef CONFIG_NET_NS
 	if (ns)
-		atomic_inc(&ns->passive);
+		refcount_inc(&ns->passive);
 #endif
 	return ns;
 }
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2178db8e47cd..8726d051f31d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -284,7 +284,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	LIST_HEAD(net_exit_list);
 
 	atomic_set(&net->count, 1);
-	atomic_set(&net->passive, 1);
+	refcount_set(&net->passive, 1);
 	net->dev_base_seq = 1;
 	net->user_ns = user_ns;
 	idr_init(&net->netns_ids);
@@ -380,7 +380,7 @@ static void net_free(struct net *net)
 void net_drop_ns(void *p)
 {
 	struct net *ns = p;
-	if (ns && atomic_dec_and_test(&ns->passive))
+	if (ns && refcount_dec_and_test(&ns->passive))
 		net_free(ns);
 }
 
@@ -501,6 +501,23 @@ static void cleanup_net(struct work_struct *work)
 		net_drop_ns(net);
 	}
 }
+
+/**
+ * net_ns_barrier - wait until concurrent net_cleanup_work is done
+ *
+ * cleanup_net runs from work queue and will first remove namespaces
+ * from the global list, then run net exit functions.
+ *
+ * Call this in module exit path to make sure that all netns
+ * ->exit ops have been invoked before the function is removed.
+ */
+void net_ns_barrier(void)
+{
+	mutex_lock(&net_mutex);
+	mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL(net_ns_barrier);
+
 static DECLARE_WORK(net_cleanup_work, cleanup_net);
 
 void __put_net(struct net *net)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 37c1e34ddd85..d3408a693166 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -277,7 +277,7 @@ static void zap_completion_queue(void)
 			struct sk_buff *skb = clist;
 			clist = clist->next;
 			if (!skb_irq_freeable(skb)) {
-				atomic_inc(&skb->users);
+				refcount_inc(&skb->users);
 				dev_kfree_skb_any(skb); /* put this one back */
 			} else {
 				__kfree_skb(skb);
@@ -309,7 +309,7 @@ repeat:
 		return NULL;
 	}
 
-	atomic_set(&skb->users, 1);
+	refcount_set(&skb->users, 1);
 	skb_reserve(skb, reserve);
 	return skb;
 }
@@ -632,7 +632,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 		skb_queue_head_init(&npinfo->txq);
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
 
-		atomic_set(&npinfo->refcnt, 1);
+		refcount_set(&npinfo->refcnt, 1);
 
 		ops = np->dev->netdev_ops;
 		if (ops->ndo_netpoll_setup) {
@@ -642,7 +642,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 		}
 	} else {
 		npinfo = rtnl_dereference(ndev->npinfo);
-		atomic_inc(&npinfo->refcnt);
+		refcount_inc(&npinfo->refcnt);
 	}
 
 	npinfo->netpoll = np;
@@ -821,7 +821,7 @@ void __netpoll_cleanup(struct netpoll *np)
 
 	synchronize_srcu(&netpoll_srcu);
 
-	if (atomic_dec_and_test(&npinfo->refcnt)) {
+	if (refcount_dec_and_test(&npinfo->refcnt)) {
 		const struct net_device_ops *ops;
 
 		ops = np->dev->netdev_ops;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2dd42c5b0366..6e1e10ff433a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3363,7 +3363,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
 {
 	ktime_t idle_start = ktime_get();
 
-	while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+	while (refcount_read(&(pkt_dev->skb->users)) != 1) {
 		if (signal_pending(current))
 			break;
 
@@ -3420,7 +3420,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) {
 		skb = pkt_dev->skb;
 		skb->protocol = eth_type_trans(skb, skb->dev);
-		atomic_add(burst, &skb->users);
+		refcount_add(burst, &skb->users);
 		local_bh_disable();
 		do {
 			ret = netif_receive_skb(skb);
@@ -3428,11 +3428,11 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 				pkt_dev->errors++;
 			pkt_dev->sofar++;
 			pkt_dev->seq_num++;
-			if (atomic_read(&skb->users) != burst) {
+			if (refcount_read(&skb->users) != burst) {
 				/* skb was queued by rps/rfs or taps,
 				 * so cannot reuse this skb
 				 */
-				atomic_sub(burst - 1, &skb->users);
+				WARN_ON(refcount_sub_and_test(burst - 1, &skb->users));
 				/* get out of the loop and wait
 				 * until skb is consumed
 				 */
@@ -3446,7 +3446,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		goto out; /* Skips xmit_mode M_START_XMIT */
 	} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
 		local_bh_disable();
-		atomic_inc(&pkt_dev->skb->users);
+		refcount_inc(&pkt_dev->skb->users);
 
 		ret = dev_queue_xmit(pkt_dev->skb);
 		switch (ret) {
@@ -3487,7 +3487,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		pkt_dev->last_ok = 0;
 		goto unlock;
 	}
-	atomic_add(burst, &pkt_dev->skb->users);
+	refcount_add(burst, &pkt_dev->skb->users);
 
 xmit_more:
 	ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0);
@@ -3513,11 +3513,11 @@ xmit_more:
 		/* fallthru */
 	case NETDEV_TX_BUSY:
 		/* Retry it next time */
-		atomic_dec(&(pkt_dev->skb->users));
+		refcount_dec(&(pkt_dev->skb->users));
 		pkt_dev->last_ok = 0;
 	}
 	if (unlikely(burst))
-		atomic_sub(burst, &pkt_dev->skb->users);
+		WARN_ON(refcount_sub_and_test(burst, &pkt_dev->skb->users));
 unlock:
 	HARD_TX_UNLOCK(odev, txq);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ed51de525a88..d1ba90980be1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -649,7 +649,7 @@ int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int g
 
 	NETLINK_CB(skb).dst_group = group;
 	if (echo)
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 	netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
 	if (echo)
 		err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f75897a33fa4..8b11341ed69a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -176,7 +176,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
 	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->head = NULL;
 	skb->truesize = sizeof(struct sk_buff);
-	atomic_set(&skb->users, 1);
+	refcount_set(&skb->users, 1);
 
 	skb->mac_header = (typeof(skb->mac_header))~0U;
 out:
@@ -247,7 +247,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	/* Account for allocated memory : skb + skb->head */
 	skb->truesize = SKB_TRUESIZE(size);
 	skb->pfmemalloc = pfmemalloc;
-	atomic_set(&skb->users, 1);
+	refcount_set(&skb->users, 1);
 	skb->head = data;
 	skb->data = data;
 	skb_reset_tail_pointer(skb);
@@ -268,7 +268,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 		kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
 		skb->fclone = SKB_FCLONE_ORIG;
-		atomic_set(&fclones->fclone_ref, 1);
+		refcount_set(&fclones->fclone_ref, 1);
 
 		fclones->skb2.fclone = SKB_FCLONE_CLONE;
 	}
@@ -314,7 +314,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
 
 	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->truesize = SKB_TRUESIZE(size);
-	atomic_set(&skb->users, 1);
+	refcount_set(&skb->users, 1);
 	skb->head = data;
 	skb->data = data;
 	skb_reset_tail_pointer(skb);
@@ -629,7 +629,7 @@ static void kfree_skbmem(struct sk_buff *skb)
 		 * This test would have no chance to be true for the clone,
 		 * while here, branch prediction will be good.
 		 */
-		if (atomic_read(&fclones->fclone_ref) == 1)
+		if (refcount_read(&fclones->fclone_ref) == 1)
 			goto fastpath;
 		break;
 
@@ -637,7 +637,7 @@ static void kfree_skbmem(struct sk_buff *skb)
 		fclones = container_of(skb, struct sk_buff_fclones, skb2);
 		break;
 	}
-	if (!atomic_dec_and_test(&fclones->fclone_ref))
+	if (!refcount_dec_and_test(&fclones->fclone_ref))
 		return;
 fastpath:
 	kmem_cache_free(skbuff_fclone_cache, fclones);
@@ -915,7 +915,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(head_frag);
 	C(data);
 	C(truesize);
-	atomic_set(&n->users, 1);
+	refcount_set(&n->users, 1);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
 	skb->cloned = 1;
@@ -1027,9 +1027,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 		return NULL;
 
 	if (skb->fclone == SKB_FCLONE_ORIG &&
-	    atomic_read(&fclones->fclone_ref) == 1) {
+	    refcount_read(&fclones->fclone_ref) == 1) {
 		n = &fclones->skb2;
-		atomic_set(&fclones->fclone_ref, 2);
+		refcount_set(&fclones->fclone_ref, 2);
 	} else {
 		if (skb_pfmemalloc(skb))
 			gfp_mask |= __GFP_MEMALLOC;
@@ -3024,7 +3024,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 		get_page(pfrag->page);
 
 		skb->truesize += copy;
-		atomic_add(copy, &sk->sk_wmem_alloc);
+		refcount_add(copy, &sk->sk_wmem_alloc);
 		skb->len += copy;
 		skb->data_len += copy;
 		offset += copy;
@@ -3844,7 +3844,7 @@ struct sk_buff *skb_clone_sk(struct sk_buff *skb)
 	struct sock *sk = skb->sk;
 	struct sk_buff *clone;
 
-	if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt))
+	if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
 		return NULL;
 
 	clone = skb_clone(skb, GFP_ATOMIC);
@@ -3915,7 +3915,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 	/* Take a reference to prevent skb_orphan() from freeing the socket,
 	 * but only if the socket refcount is not zero.
 	 */
-	if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+	if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
 		*skb_hwtstamps(skb) = *hwtstamps;
 		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
 		sock_put(sk);
@@ -3997,7 +3997,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
 	/* Take a reference to prevent skb_orphan() from freeing the socket,
 	 * but only if the socket refcount is not zero.
 	 */
-	if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+	if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
 		err = sock_queue_err_skb(sk, skb);
 		sock_put(sk);
 	}
diff --git a/net/core/sock.c b/net/core/sock.c
index 6f4b090241c1..ba0ef6a7dbaf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1528,7 +1528,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		if (likely(sk->sk_net_refcnt))
 			get_net(net);
 		sock_net_set(sk, net);
-		atomic_set(&sk->sk_wmem_alloc, 1);
+		refcount_set(&sk->sk_wmem_alloc, 1);
 
 		mem_cgroup_sk_alloc(sk);
 		cgroup_sk_alloc(&sk->sk_cgrp_data);
@@ -1552,7 +1552,7 @@ static void __sk_destruct(struct rcu_head *head)
 		sk->sk_destruct(sk);
 
 	filter = rcu_dereference_check(sk->sk_filter,
-				       atomic_read(&sk->sk_wmem_alloc) == 0);
+				       refcount_read(&sk->sk_wmem_alloc) == 0);
 	if (filter) {
 		sk_filter_uncharge(sk, filter);
 		RCU_INIT_POINTER(sk->sk_filter, NULL);
@@ -1602,7 +1602,7 @@ void sk_free(struct sock *sk)
 	 * some packets are still in some tx queue.
 	 * If not null, sock_wfree() will call __sk_free(sk) later
 	 */
-	if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+	if (refcount_dec_and_test(&sk->sk_wmem_alloc))
 		__sk_free(sk);
 }
 EXPORT_SYMBOL(sk_free);
@@ -1659,7 +1659,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		/*
 		 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
 		 */
-		atomic_set(&newsk->sk_wmem_alloc, 1);
+		refcount_set(&newsk->sk_wmem_alloc, 1);
 		atomic_set(&newsk->sk_omem_alloc, 0);
 		sk_init_common(newsk);
 
@@ -1708,7 +1708,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		 * (Documentation/RCU/rculist_nulls.txt for details)
 		 */
 		smp_wmb();
-		atomic_set(&newsk->sk_refcnt, 2);
+		refcount_set(&newsk->sk_refcnt, 2);
 
 		/*
 		 * Increment the counter in the same struct proto as the master
@@ -1787,7 +1787,7 @@ void sock_wfree(struct sk_buff *skb)
 		 * Keep a reference on sk_wmem_alloc, this will be released
 		 * after sk_write_space() call
 		 */
-		atomic_sub(len - 1, &sk->sk_wmem_alloc);
+		WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
 		sk->sk_write_space(sk);
 		len = 1;
 	}
@@ -1795,7 +1795,7 @@ void sock_wfree(struct sk_buff *skb)
 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
 	 * could not do because of in-flight packets
 	 */
-	if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
+	if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
 		__sk_free(sk);
 }
 EXPORT_SYMBOL(sock_wfree);
@@ -1807,7 +1807,7 @@ void __sock_wfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 
-	if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
+	if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
 		__sk_free(sk);
 }
 
@@ -1829,7 +1829,7 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 	 * is enough to guarantee sk_free() wont free this sock until
 	 * all in-flight packets are completed
 	 */
-	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
 }
 EXPORT_SYMBOL(skb_set_owner_w);
 
@@ -1851,8 +1851,8 @@ void skb_orphan_partial(struct sk_buff *skb)
 		) {
 		struct sock *sk = skb->sk;
 
-		if (atomic_inc_not_zero(&sk->sk_refcnt)) {
-			atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+		if (refcount_inc_not_zero(&sk->sk_refcnt)) {
+			WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
 			skb->destructor = sock_efree;
 		}
 	} else {
@@ -1912,7 +1912,7 @@ EXPORT_SYMBOL(sock_i_ino);
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 			     gfp_t priority)
 {
-	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+	if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
 		struct sk_buff *skb = alloc_skb(size, priority);
 		if (skb) {
 			skb_set_owner_w(skb, sk);
@@ -1987,7 +1987,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
 			break;
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+		if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
 			break;
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
 			break;
@@ -2310,7 +2310,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
 		if (sk->sk_type == SOCK_STREAM) {
 			if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
 				return 1;
-		} else if (atomic_read(&sk->sk_wmem_alloc) <
+		} else if (refcount_read(&sk->sk_wmem_alloc) <
 			   prot->sysctl_wmem[0])
 				return 1;
 	}
@@ -2577,7 +2577,7 @@ static void sock_def_write_space(struct sock *sk)
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
-	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
 		wq = rcu_dereference(sk->sk_wq);
 		if (skwq_has_sleeper(wq))
 			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
@@ -2687,7 +2687,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	 * (Documentation/RCU/rculist_nulls.txt for details)
 	 */
 	smp_wmb();
-	atomic_set(&sk->sk_refcnt, 1);
+	refcount_set(&sk->sk_refcnt, 1);
 	atomic_set(&sk->sk_drops, 0);
 }
 EXPORT_SYMBOL(sock_init_data);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4fccc0c37fbd..c376af5bfdfb 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -353,7 +353,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		ireq->pktopts = skb;
 	}
 	ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index eeb5fc561f80..21dedf6fd0f7 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -559,7 +559,7 @@ static inline void dn_neigh_format_entry(struct seq_file *seq,
 		   (dn->flags&DN_NDFLAG_R2) ? "2" : "-",
 		   (dn->flags&DN_NDFLAG_P3) ? "3" : "-",
 		   dn->n.nud_state,
-		   atomic_read(&dn->n.refcnt),
+		   refcount_read(&dn->n.refcnt),
 		   dn->blksize,
 		   (dn->n.dev) ? dn->n.dev->name : "?");
 	read_unlock(&n->lock);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 58925b6597de..76c2077c3f5b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk)
 	}
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 	WARN_ON(sk->sk_wmem_queued);
 	WARN_ON(sk->sk_forward_alloc);
 
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index ae206163c273..c2044775ae7d 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -265,7 +265,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
 		    entry->key_len == key_len &&
 		    memcmp(entry->key, key, key_len) == 0) {
 			entry->activity += 1;
-			atomic_inc(&entry->lsm_data->refcount);
+			refcount_inc(&entry->lsm_data->refcount);
 			secattr->cache = entry->lsm_data;
 			secattr->flags |= NETLBL_SECATTR_CACHE;
 			secattr->type = NETLBL_NLTYPE_CIPSOV4;
@@ -332,7 +332,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr,
 	}
 	entry->key_len = cipso_ptr_len;
 	entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
-	atomic_inc(&secattr->cache->refcount);
+	refcount_inc(&secattr->cache->refcount);
 	entry->lsm_data = secattr->cache;
 
 	bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a7dd088d5fc9..38d9af9b917c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -252,7 +252,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
 	/* Reference in_dev->dev */
 	dev_hold(dev);
 	/* Account for reference dev->ip_ptr (below) */
-	in_dev_hold(in_dev);
+	refcount_set(&in_dev->refcnt, 1);
 
 	err = devinet_sysctl_register(in_dev);
 	if (err) {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1f18b4650253..0cbee0a666ff 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -307,7 +307,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 			skb->data_len += tailen;
 			skb->truesize += tailen;
 			if (sk)
-				atomic_add(tailen, &sk->sk_wmem_alloc);
+				refcount_add(tailen, &sk->sk_wmem_alloc);
 
 			goto out;
 		}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c4032302d7cd..28f14afd0dd3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -173,7 +173,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 
 static void ip_ma_put(struct ip_mc_list *im)
 {
-	if (atomic_dec_and_test(&im->refcnt)) {
+	if (refcount_dec_and_test(&im->refcnt)) {
 		in_dev_put(im->interface);
 		kfree_rcu(im, rcu);
 	}
@@ -199,7 +199,7 @@ static void igmp_stop_timer(struct ip_mc_list *im)
 {
 	spin_lock_bh(&im->lock);
 	if (del_timer(&im->timer))
-		atomic_dec(&im->refcnt);
+		refcount_dec(&im->refcnt);
 	im->tm_running = 0;
 	im->reporter = 0;
 	im->unsolicit_count = 0;
@@ -213,7 +213,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
 
 	im->tm_running = 1;
 	if (!mod_timer(&im->timer, jiffies+tv+2))
-		atomic_inc(&im->refcnt);
+		refcount_inc(&im->refcnt);
 }
 
 static void igmp_gq_start_timer(struct in_device *in_dev)
@@ -249,7 +249,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay)
 			spin_unlock_bh(&im->lock);
 			return;
 		}
-		atomic_dec(&im->refcnt);
+		refcount_dec(&im->refcnt);
 	}
 	igmp_start_timer(im, max_delay);
 	spin_unlock_bh(&im->lock);
@@ -1374,7 +1374,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	/* initial mode is (EX, empty) */
 	im->sfmode = MCAST_EXCLUDE;
 	im->sfcount[MCAST_EXCLUDE] = 1;
-	atomic_set(&im->refcnt, 1);
+	refcount_set(&im->refcnt, 1);
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
 	setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a3fa1a5b6d98..4089c013cb03 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -756,7 +756,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
 	 * are committed to memory and refcnt initialized.
 	 */
 	smp_wmb();
-	atomic_set(&req->rsk_refcnt, 2 + 1);
+	refcount_set(&req->rsk_refcnt, 2 + 1);
 }
 
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index b5e9317eaf9e..96e95e83cc61 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -276,11 +276,11 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
 void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
 {
 	if (del_timer(&fq->timer))
-		atomic_dec(&fq->refcnt);
+		refcount_dec(&fq->refcnt);
 
 	if (!(fq->flags & INET_FRAG_COMPLETE)) {
 		fq_unlink(fq, f);
-		atomic_dec(&fq->refcnt);
+		refcount_dec(&fq->refcnt);
 	}
 }
 EXPORT_SYMBOL(inet_frag_kill);
@@ -329,7 +329,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 	 */
 	hlist_for_each_entry(qp, &hb->chain, list) {
 		if (qp->net == nf && f->match(qp, arg)) {
-			atomic_inc(&qp->refcnt);
+			refcount_inc(&qp->refcnt);
 			spin_unlock(&hb->chain_lock);
 			qp_in->flags |= INET_FRAG_COMPLETE;
 			inet_frag_put(qp_in, f);
@@ -339,9 +339,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 #endif
 	qp = qp_in;
 	if (!mod_timer(&qp->timer, jiffies + nf->timeout))
-		atomic_inc(&qp->refcnt);
+		refcount_inc(&qp->refcnt);
 
-	atomic_inc(&qp->refcnt);
+	refcount_inc(&qp->refcnt);
 	hlist_add_head(&qp->list, &hb->chain);
 
 	spin_unlock(&hb->chain_lock);
@@ -370,7 +370,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 
 	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
 	spin_lock_init(&q->lock);
-	atomic_set(&q->refcnt, 1);
+	refcount_set(&q->refcnt, 1);
 
 	return q;
 }
@@ -405,7 +405,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 	spin_lock(&hb->chain_lock);
 	hlist_for_each_entry(q, &hb->chain, list) {
 		if (q->net == nf && f->match(q, key)) {
-			atomic_inc(&q->refcnt);
+			refcount_inc(&q->refcnt);
 			spin_unlock(&hb->chain_lock);
 			return q;
 		}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e9a59d2d91d4..2e3389d614d1 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -43,7 +43,7 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
 /* This function handles inet_sock, but also timewait and request sockets
  * for IPv4/IPv6.
  */
-u32 sk_ehashfn(const struct sock *sk)
+static u32 sk_ehashfn(const struct sock *sk)
 {
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == AF_INET6 &&
@@ -246,7 +246,7 @@ EXPORT_SYMBOL_GPL(__inet_lookup_listener);
 /* All sockets share common refcount, but have different destructors */
 void sock_gen_put(struct sock *sk)
 {
-	if (!atomic_dec_and_test(&sk->sk_refcnt))
+	if (!refcount_dec_and_test(&sk->sk_refcnt))
 		return;
 
 	if (sk->sk_state == TCP_TIME_WAIT)
@@ -287,7 +287,7 @@ begin:
 			continue;
 		if (likely(INET_MATCH(sk, net, acookie,
 				      saddr, daddr, ports, dif))) {
-			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+			if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
 				goto out;
 			if (unlikely(!INET_MATCH(sk, net, acookie,
 						 saddr, daddr, ports, dif))) {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index f8aff2c71cde..5b039159e67a 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -76,7 +76,7 @@ void inet_twsk_free(struct inet_timewait_sock *tw)
 
 void inet_twsk_put(struct inet_timewait_sock *tw)
 {
-	if (atomic_dec_and_test(&tw->tw_refcnt))
+	if (refcount_dec_and_test(&tw->tw_refcnt))
 		inet_twsk_free(tw);
 }
 EXPORT_SYMBOL_GPL(inet_twsk_put);
@@ -131,7 +131,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	 * We can use atomic_set() because prior spin_lock()/spin_unlock()
 	 * committed into memory all tw fields.
 	 */
-	atomic_set(&tw->tw_refcnt, 4);
+	refcount_set(&tw->tw_refcnt, 4);
 	inet_twsk_add_node_rcu(tw, &ehead->chain);
 
 	/* Step 3: Remove SK from hash chain */
@@ -195,7 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 		 * to a non null value before everything is setup for this
 		 * timewait socket.
 		 */
-		atomic_set(&tw->tw_refcnt, 0);
+		refcount_set(&tw->tw_refcnt, 0);
 
 		__module_get(tw->tw_prot->owner);
 	}
@@ -278,7 +278,7 @@ restart:
 				atomic_read(&twsk_net(tw)->count))
 				continue;
 
-			if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt)))
+			if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
 				continue;
 
 			if (unlikely((tw->tw_family != family) ||
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 86fa45809540..c5a117cc6619 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -115,7 +115,7 @@ static void inetpeer_gc_worker(struct work_struct *work)
 
 		n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
 
-		if (!atomic_read(&p->refcnt)) {
+		if (refcount_read(&p->refcnt) == 1) {
 			list_del(&p->gc_list);
 			kmem_cache_free(peer_cachep, p);
 		}
@@ -202,10 +202,11 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
 		int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
 		if (cmp == 0) {
 			/* Before taking a reference, check if this entry was
-			 * deleted (refcnt=-1)
+			 * deleted (refcnt=0)
 			 */
-			if (!atomic_add_unless(&u->refcnt, 1, -1))
+			if (!refcount_inc_not_zero(&u->refcnt)) {
 				u = NULL;
+			}
 			return u;
 		}
 		if (cmp == -1)
@@ -382,11 +383,10 @@ static int inet_peer_gc(struct inet_peer_base *base,
 	while (stackptr > stack) {
 		stackptr--;
 		p = rcu_deref_locked(**stackptr, base);
-		if (atomic_read(&p->refcnt) == 0) {
+		if (refcount_read(&p->refcnt) == 1) {
 			smp_rmb();
 			delta = (__u32)jiffies - p->dtime;
-			if (delta >= ttl &&
-			    atomic_cmpxchg(&p->refcnt, 0, -1) == 0) {
+			if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) {
 				p->gc_next = gchead;
 				gchead = p;
 			}
@@ -432,7 +432,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 relookup:
 	p = lookup(daddr, stack, base);
 	if (p != peer_avl_empty) {
-		atomic_inc(&p->refcnt);
+		refcount_inc(&p->refcnt);
 		write_sequnlock_bh(&base->lock);
 		return p;
 	}
@@ -444,7 +444,7 @@ relookup:
 	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
 	if (p) {
 		p->daddr = *daddr;
-		atomic_set(&p->refcnt, 1);
+		refcount_set(&p->refcnt, 2);
 		atomic_set(&p->rid, 0);
 		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 		p->rate_tokens = 0;
@@ -468,7 +468,7 @@ void inet_putpeer(struct inet_peer *p)
 {
 	p->dtime = (__u32)jiffies;
 	smp_mb__before_atomic();
-	atomic_dec(&p->refcnt);
+	refcount_dec(&p->refcnt);
 }
 EXPORT_SYMBOL_GPL(inet_putpeer);
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b3cdeec85f1f..9a8cfac503dc 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -312,7 +312,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	unsigned int sum_truesize = 0;
 
 	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
-		atomic_inc(&qp->q.refcnt);
+		refcount_inc(&qp->q.refcnt);
 		return -ETIMEDOUT;
 	}
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7a3fd25e8913..2e61e2af251a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -964,7 +964,8 @@ static int __ip_append_data(struct sock *sk,
 		csummode = CHECKSUM_PARTIAL;
 
 	cork->length += length;
-	if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
+	if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
+	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
 	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
@@ -1036,7 +1037,7 @@ alloc_new_skb:
 						(flags & MSG_DONTWAIT), &err);
 			} else {
 				skb = NULL;
-				if (atomic_read(&sk->sk_wmem_alloc) <=
+				if (refcount_read(&sk->sk_wmem_alloc) <=
 				    2 * sk->sk_sndbuf)
 					skb = sock_wmalloc(sk,
 							   alloclen + hh_len + 15, 1,
@@ -1144,7 +1145,7 @@ alloc_new_skb:
 			skb->len += copy;
 			skb->data_len += copy;
 			skb->truesize += copy;
-			atomic_add(copy, &sk->sk_wmem_alloc);
+			refcount_add(copy, &sk->sk_wmem_alloc);
 		}
 		offset += copy;
 		length -= copy;
@@ -1368,7 +1369,7 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 		skb->len += len;
 		skb->data_len += len;
 		skb->truesize += len;
-		atomic_add(len, &sk->sk_wmem_alloc);
+		refcount_add(len, &sk->sk_wmem_alloc);
 		offset += len;
 		size -= len;
 	}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a1d521be612b..bb909f1d7537 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2406,6 +2406,67 @@ errout:
 	rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
 }
 
+static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+			     struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct nlattr *tb[RTA_MAX + 1];
+	struct sk_buff *skb = NULL;
+	struct mfc_cache *cache;
+	struct mr_table *mrt;
+	struct rtmsg *rtm;
+	__be32 src, grp;
+	u32 tableid;
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
+			  rtm_ipv4_policy, extack);
+	if (err < 0)
+		goto errout;
+
+	rtm = nlmsg_data(nlh);
+
+	src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
+	grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
+	tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+
+	mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
+	if (IS_ERR(mrt)) {
+		err = PTR_ERR(mrt);
+		goto errout_free;
+	}
+
+	/* entries are added/deleted only under RTNL */
+	rcu_read_lock();
+	cache = ipmr_cache_find(mrt, src, grp);
+	rcu_read_unlock();
+	if (!cache) {
+		err = -ENOENT;
+		goto errout_free;
+	}
+
+	skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL);
+	if (!skb) {
+		err = -ENOBUFS;
+		goto errout_free;
+	}
+
+	err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
+			       nlh->nlmsg_seq, cache,
+			       RTM_NEWROUTE, 0);
+	if (err < 0)
+		goto errout_free;
+
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+
+errout:
+	return err;
+
+errout_free:
+	kfree_skb(skb);
+	goto errout;
+}
+
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
@@ -3053,7 +3114,7 @@ int __init ip_mr_init(void)
 	}
 #endif
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
-		      NULL, ipmr_rtm_dumproute, NULL);
+		      ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL);
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
 		      ipmr_rtm_route, NULL, NULL);
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 038f293c2376..7d72decb80f9 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -47,7 +47,7 @@ struct clusterip_config {
 
 	__be32 clusterip;			/* the IP address */
 	u_int8_t clustermac[ETH_ALEN];		/* the MAC address */
-	struct net_device *dev;			/* device */
+	int ifindex;				/* device ifindex */
 	u_int16_t num_total_nodes;		/* total number of nodes */
 	unsigned long local_nodes;		/* node number array */
 
@@ -57,6 +57,9 @@ struct clusterip_config {
 	enum clusterip_hashmode hash_mode;	/* which hashing mode */
 	u_int32_t hash_initval;			/* hash initialization */
 	struct rcu_head rcu;
+
+	char ifname[IFNAMSIZ];			/* device ifname */
+	struct notifier_block notifier;		/* refresh c->ifindex in it */
 };
 
 #ifdef CONFIG_PROC_FS
@@ -98,9 +101,8 @@ clusterip_config_put(struct clusterip_config *c)
  * entry(rule) is removed, remove the config from lists, but don't free it
  * yet, since proc-files could still be holding references */
 static inline void
-clusterip_config_entry_put(struct clusterip_config *c)
+clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
 {
-	struct net *net = dev_net(c->dev);
 	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
 
 	local_bh_disable();
@@ -109,8 +111,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 		spin_unlock(&cn->lock);
 		local_bh_enable();
 
-		dev_mc_del(c->dev, c->clustermac);
-		dev_put(c->dev);
+		unregister_netdevice_notifier(&c->notifier);
 
 		/* In case anyone still accesses the file, the open/close
 		 * functions are also incrementing the refcount on their own,
@@ -170,19 +171,55 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
 		set_bit(i->local_nodes[n] - 1, &c->local_nodes);
 }
 
-static struct clusterip_config *
-clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
-		      struct net_device *dev)
+static int
+clusterip_netdev_event(struct notifier_block *this, unsigned long event,
+		       void *ptr)
 {
-	struct net *net = dev_net(dev);
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct clusterip_config *c;
+
+	c = container_of(this, struct clusterip_config, notifier);
+	switch (event) {
+	case NETDEV_REGISTER:
+		if (!strcmp(dev->name, c->ifname)) {
+			c->ifindex = dev->ifindex;
+			dev_mc_add(dev, c->clustermac);
+		}
+		break;
+	case NETDEV_UNREGISTER:
+		if (dev->ifindex == c->ifindex) {
+			dev_mc_del(dev, c->clustermac);
+			c->ifindex = -1;
+		}
+		break;
+	case NETDEV_CHANGENAME:
+		if (!strcmp(dev->name, c->ifname)) {
+			c->ifindex = dev->ifindex;
+			dev_mc_add(dev, c->clustermac);
+		} else if (dev->ifindex == c->ifindex) {
+			dev_mc_del(dev, c->clustermac);
+			c->ifindex = -1;
+		}
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct clusterip_config *
+clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
+		      __be32 ip, const char *iniface)
+{
 	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+	struct clusterip_config *c;
+	int err;
 
 	c = kzalloc(sizeof(*c), GFP_ATOMIC);
 	if (!c)
 		return ERR_PTR(-ENOMEM);
 
-	c->dev = dev;
+	strcpy(c->ifname, iniface);
+	c->ifindex = -1;
 	c->clusterip = ip;
 	memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
 	c->num_total_nodes = i->num_total_nodes;
@@ -213,17 +250,27 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 					  cn->procdir,
 					  &clusterip_proc_fops, c);
 		if (!c->pde) {
-			spin_lock_bh(&cn->lock);
-			list_del_rcu(&c->list);
-			spin_unlock_bh(&cn->lock);
-			kfree(c);
-
-			return ERR_PTR(-ENOMEM);
+			err = -ENOMEM;
+			goto err;
 		}
 	}
 #endif
 
-	return c;
+	c->notifier.notifier_call = clusterip_netdev_event;
+	err = register_netdevice_notifier(&c->notifier);
+	if (!err)
+		return c;
+
+#ifdef CONFIG_PROC_FS
+	proc_remove(c->pde);
+err:
+#endif
+	spin_lock_bh(&cn->lock);
+	list_del_rcu(&c->list);
+	spin_unlock_bh(&cn->lock);
+	kfree(c);
+
+	return ERR_PTR(err);
 }
 
 #ifdef CONFIG_PROC_FS
@@ -425,14 +472,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 					e->ip.iniface);
 				return -ENOENT;
 			}
+			dev_put(dev);
 
-			config = clusterip_config_init(cipinfo,
-							e->ip.dst.s_addr, dev);
-			if (IS_ERR(config)) {
-				dev_put(dev);
+			config = clusterip_config_init(par->net, cipinfo,
+						       e->ip.dst.s_addr,
+						       e->ip.iniface);
+			if (IS_ERR(config))
 				return PTR_ERR(config);
-			}
-			dev_mc_add(config->dev, config->clustermac);
 		}
 	}
 	cipinfo->config = config;
@@ -458,7 +504,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
 
 	/* if no more entries are referencing the config, remove it
 	 * from the list and destroy the proc entry */
-	clusterip_config_entry_put(cipinfo->config);
+	clusterip_config_entry_put(par->net, cipinfo->config);
 
 	clusterip_config_put(cipinfo->config);
 
@@ -558,10 +604,9 @@ arp_mangle(void *priv,
 	 * addresses on different interfacs.  However, in the CLUSTERIP case
 	 * this wouldn't work, since we didn't subscribe the mcast group on
 	 * other interfaces */
-	if (c->dev != state->out) {
-		pr_debug("not mangling arp reply on different "
-			 "interface: cip'%s'-skb'%s'\n",
-			 c->dev->name, state->out->name);
+	if (c->ifindex != state->out->ifindex) {
+		pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n",
+			 c->ifindex, state->out->ifindex);
 		clusterip_config_put(c);
 		return NF_ACCEPT;
 	}
@@ -743,14 +788,20 @@ static const struct file_operations clusterip_proc_fops = {
 static int clusterip_net_init(struct net *net)
 {
 	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+	int ret;
 
 	INIT_LIST_HEAD(&cn->configs);
 
 	spin_lock_init(&cn->lock);
 
+	ret = nf_register_net_hook(net, &cip_arp_ops);
+	if (ret < 0)
+		return ret;
+
 #ifdef CONFIG_PROC_FS
 	cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
 	if (!cn->procdir) {
+		nf_unregister_net_hook(net, &cip_arp_ops);
 		pr_err("Unable to proc dir entry\n");
 		return -ENOMEM;
 	}
@@ -765,6 +816,7 @@ static void clusterip_net_exit(struct net *net)
 	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
 	proc_remove(cn->procdir);
 #endif
+	nf_unregister_net_hook(net, &cip_arp_ops);
 }
 
 static struct pernet_operations clusterip_net_ops = {
@@ -786,17 +838,11 @@ static int __init clusterip_tg_init(void)
 	if (ret < 0)
 		goto cleanup_subsys;
 
-	ret = nf_register_hook(&cip_arp_ops);
-	if (ret < 0)
-		goto cleanup_target;
-
 	pr_info("ClusterIP Version %s loaded successfully\n",
 		CLUSTERIP_VERSION);
 
 	return 0;
 
-cleanup_target:
-	xt_unregister_target(&clusterip_tg_reg);
 cleanup_subsys:
 	unregister_pernet_subsys(&clusterip_net_ops);
 	return ret;
@@ -806,7 +852,6 @@ static void __exit clusterip_tg_exit(void)
 {
 	pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
 
-	nf_unregister_hook(&cip_arp_ops);
 	xt_unregister_target(&clusterip_tg_reg);
 	unregister_pernet_subsys(&clusterip_net_ops);
 
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index dc1dea15c1b4..f39037fca923 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -98,8 +98,8 @@ static int masq_device_event(struct notifier_block *this,
 		 */
 		NF_CT_ASSERT(dev->ifindex != 0);
 
-		nf_ct_iterate_cleanup(net, device_cmp,
-				      (void *)(long)dev->ifindex, 0, 0);
+		nf_ct_iterate_cleanup_net(net, device_cmp,
+					  (void *)(long)dev->ifindex, 0, 0);
 	}
 
 	return NOTIFY_DONE;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index ccfbce13a633..b8f0db54b197 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -290,7 +290,7 @@ void ping_close(struct sock *sk, long timeout)
 {
 	pr_debug("ping_close(sk=%p,sk->num=%u)\n",
 		 inet_sk(sk), inet_sk(sk)->inet_num);
-	pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter);
+	pr_debug("isk->refcnt = %d\n", refcount_read(&sk->sk_refcnt));
 
 	sk_common_release(sk);
 }
@@ -1127,7 +1127,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
 		0, 0L, 0,
 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
 		0, sock_i_ino(sp),
-		atomic_read(&sp->sk_refcnt), sp,
+		refcount_read(&sp->sk_refcnt), sp,
 		atomic_read(&sp->sk_drops));
 }
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bdffad875691..b0bb5d0a30bd 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1063,7 +1063,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
 		0, 0L, 0,
 		from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
 		0, sock_i_ino(sp),
-		atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
+		refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
 }
 
 static int raw_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 7835bb4a1fab..0905cf04c2a4 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -213,7 +213,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
 						 NULL, &own_req);
 	if (child) {
-		atomic_set(&req->rsk_refcnt, 1);
+		refcount_set(&req->rsk_refcnt, 1);
 		tcp_sk(child)->tsoffset = tsoff;
 		sock_rps_save_rxhash(child, skb);
 		inet_csk_reqsk_queue_add(sk, req, child);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 058f509ca98e..71ce33decd97 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -664,7 +664,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
 	return skb->len < size_goal &&
 	       sysctl_tcp_autocorking &&
 	       skb != tcp_write_queue_head(sk) &&
-	       atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
+	       refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
 }
 
 static void tcp_push(struct sock *sk, int flags, int mss_now,
@@ -692,7 +692,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
 		/* It is possible TX completion already happened
 		 * before we set TSQ_THROTTLED.
 		 */
-		if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize)
+		if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize)
 			return;
 	}
 
@@ -2350,6 +2350,8 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_init_send_head(sk);
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
+	dst_release(sk->sk_rx_dst);
+	sk->sk_rx_dst = NULL;
 	tcp_saved_syn_free(tp);
 
 	/* Clean up fastopen related fields */
@@ -2479,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		name[val] = 0;
 
 		lock_sock(sk);
-		err = tcp_set_congestion_control(sk, name);
+		err = tcp_set_congestion_control(sk, name, true);
 		release_sock(sk);
 		return err;
 	}
@@ -3062,6 +3064,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (get_user(len, optlen))
 			return -EFAULT;
 		len = min_t(unsigned int, len, TCP_ULP_NAME_MAX);
+		if (!icsk->icsk_ulp_ops) {
+			if (put_user(0, optlen))
+				return -EFAULT;
+			return 0;
+		}
 		if (put_user(len, optlen))
 			return -EFAULT;
 		if (copy_to_user(optval, icsk->icsk_ulp_ops->name, len))
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 324c9bcc5456..fde983f6376b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
 		INET_ECN_dontxmit(sk);
 }
 
-static void tcp_reinit_congestion_control(struct sock *sk,
-					  const struct tcp_congestion_ops *ca)
+void tcp_reinit_congestion_control(struct sock *sk,
+				   const struct tcp_congestion_ops *ca)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -333,8 +333,12 @@ out:
 	return ret;
 }
 
-/* Change congestion control for socket */
-int tcp_set_congestion_control(struct sock *sk, const char *name)
+/* Change congestion control for socket. If load is false, then it is the
+ * responsibility of the caller to call tcp_init_congestion_control or
+ * tcp_reinit_congestion_control (if the current congestion control was
+ * already initialized.
+ */
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_congestion_ops *ca;
@@ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 		return -EPERM;
 
 	rcu_read_lock();
-	ca = __tcp_ca_find_autoload(name);
+	if (!load)
+		ca = tcp_ca_find(name);
+	else
+		ca = __tcp_ca_find_autoload(name);
 	/* No change asking for existing value */
 	if (ca == icsk->icsk_ca_ops) {
 		icsk->icsk_ca_setsockopt = 1;
 		goto out;
 	}
-	if (!ca)
+	if (!ca) {
 		err = -ENOENT;
-	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
-		   ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
+	} else if (!load) {
+		icsk->icsk_ca_ops = ca;
+		if (!try_module_get(ca->owner))
+			err = -EBUSY;
+	} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
+		     ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
 		err = -EPERM;
-	else if (!try_module_get(ca->owner))
+	} else if (!try_module_get(ca->owner)) {
 		err = -EBUSY;
-	else
+	} else {
 		tcp_reinit_congestion_control(sk, ca);
+	}
  out:
 	rcu_read_unlock();
 	return err;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 4af82b914dd4..ce9c7fef200f 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -214,13 +214,14 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
 	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
 				  TCP_TIMEOUT_INIT, TCP_RTO_MAX);
 
-	atomic_set(&req->rsk_refcnt, 2);
+	refcount_set(&req->rsk_refcnt, 2);
 
 	/* Now finish processing the fastopen child socket. */
 	inet_csk(child)->icsk_af_ops->rebuild_header(child);
 	tcp_init_congestion_control(child);
 	tcp_mtup_init(child);
 	tcp_init_metrics(child);
+	tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
 	tcp_init_buffer_space(child);
 
 	tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2ab7e2fa9bb9..2920e0cb09f8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5571,7 +5571,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	icsk->icsk_af_ops->rebuild_header(sk);
 
 	tcp_init_metrics(sk);
-
+	tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
 	tcp_init_congestion_control(sk);
 
 	/* Prevent spurious tcp_cwnd_restart() on first data
@@ -5977,6 +5977,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		} else {
 			/* Make sure socket is routed, for correct metrics. */
 			icsk->icsk_af_ops->rebuild_header(sk);
+			tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
 			tcp_init_congestion_control(sk);
 
 			tcp_mtup_init(sk);
@@ -6190,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req,
 	ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
 
 	if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
-	    (ecn_ok_dst & DST_FEATURE_ECN_CA))
+	    (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
+	    tcp_bpf_ca_needs_ecn((struct sock *)req))
 		inet_rsk(req)->ecn_ok = 1;
 }
 
@@ -6406,7 +6408,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	} else {
 		tcp_rsk(req)->tfo_listener = false;
 		if (!want_cookie)
-			inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+			inet_csk_reqsk_queue_hash_add(sk, req,
+				tcp_timeout_init((struct sock *)req));
 		af_ops->send_synack(sk, dst, &fl, req, &foc,
 				    !want_cookie ? TCP_SYNACK_NORMAL :
 						   TCP_SYNACK_COOKIE);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d774bcd9a54b..6ec6900eb300 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2323,7 +2323,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
 		icsk->icsk_probes_out,
 		sock_i_ino(sk),
-		atomic_read(&sk->sk_refcnt), sk,
+		refcount_read(&sk->sk_refcnt), sk,
 		jiffies_to_clock_t(icsk->icsk_rto),
 		jiffies_to_clock_t(icsk->icsk_ack.ato),
 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
@@ -2349,7 +2349,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
 		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
-		atomic_read(&tw->tw_refcnt), tw);
+		refcount_read(&tw->tw_refcnt), tw);
 }
 
 #define TMPSZ 150
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d30ee31e94eb..0ff83c1637d8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -351,6 +351,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 	int full_space = tcp_full_space(sk_listener);
 	u32 window_clamp;
 	__u8 rcv_wscale;
+	u32 rcv_wnd;
 	int mss;
 
 	mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
@@ -363,6 +364,12 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 	    (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
 		req->rsk_window_clamp = full_space;
 
+	rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req);
+	if (rcv_wnd == 0)
+		rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+	else if (full_space < rcv_wnd * mss)
+		full_space = rcv_wnd * mss;
+
 	/* tcp_full_space because it is guaranteed to be the first packet */
 	tcp_select_initial_window(full_space,
 		mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -370,7 +377,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 		&req->rsk_window_clamp,
 		ireq->wscale_ok,
 		&rcv_wscale,
-		dst_metric(dst, RTAX_INITRWND));
+		rcv_wnd);
 	ireq->rcv_wscale = rcv_wscale;
 }
 EXPORT_SYMBOL(tcp_openreq_init_rwin);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index bc68da38ea86..11f69bbf9307 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -152,7 +152,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 		swap(gso_skb->sk, skb->sk);
 		swap(gso_skb->destructor, skb->destructor);
 		sum_truesize += skb->truesize;
-		atomic_add(sum_truesize - gso_skb->truesize,
+		refcount_add(sum_truesize - gso_skb->truesize,
 			   &skb->sk->sk_wmem_alloc);
 	}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9a9c395b6235..4d36f0b093e6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -316,7 +316,8 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
 	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
 	if (!(tp->ecn_flags & TCP_ECN_OK))
 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
-	else if (tcp_ca_needs_ecn(sk))
+	else if (tcp_ca_needs_ecn(sk) ||
+		 tcp_bpf_ca_needs_ecn(sk))
 		INET_ECN_xmit(sk);
 }
 
@@ -324,8 +325,9 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
 static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
 	bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
-		       tcp_ca_needs_ecn(sk);
+		tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
 
 	if (!use_ecn) {
 		const struct dst_entry *dst = __sk_dst_get(sk);
@@ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
 	if (use_ecn) {
 		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
 		tp->ecn_flags = TCP_ECN_OK;
-		if (tcp_ca_needs_ecn(sk))
+		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
 			INET_ECN_xmit(sk);
 	}
 }
@@ -861,12 +863,11 @@ void tcp_wfree(struct sk_buff *skb)
 	struct sock *sk = skb->sk;
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned long flags, nval, oval;
-	int wmem;
 
 	/* Keep one reference on sk_wmem_alloc.
 	 * Will be released by sk_free() from here or tcp_tasklet_func()
 	 */
-	wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
 
 	/* If this softirq is serviced by ksoftirqd, we are likely under stress.
 	 * Wait until our queues (qdisc + devices) are drained.
@@ -875,7 +876,7 @@ void tcp_wfree(struct sk_buff *skb)
 	 * - chance for incoming ACK (processed by another cpu maybe)
 	 *   to migrate this flow (skb->ooo_okay will be eventually set)
 	 */
-	if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
+	if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
 		goto out;
 
 	for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
@@ -925,7 +926,7 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
 		if (nval != oval)
 			continue;
 
-		if (!atomic_inc_not_zero(&sk->sk_wmem_alloc))
+		if (!refcount_inc_not_zero(&sk->sk_wmem_alloc))
 			break;
 		/* queue this socket to tasklet queue */
 		tsq = this_cpu_ptr(&tsq_tasklet);
@@ -1045,7 +1046,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	skb->sk = sk;
 	skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
 	skb_set_hash_from_sk(skb, sk);
-	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
 
 	skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
 
@@ -2176,7 +2177,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
 	limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
 	limit <<= factor;
 
-	if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+	if (refcount_read(&sk->sk_wmem_alloc) > limit) {
 		/* Always send the 1st or 2nd skb in write queue.
 		 * No need to wait for TX completion to call us back,
 		 * after softirq/tasklet schedule.
@@ -2192,7 +2193,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
 		 * test again the condition.
 		 */
 		smp_mb__after_atomic();
-		if (atomic_read(&sk->sk_wmem_alloc) > limit)
+		if (refcount_read(&sk->sk_wmem_alloc) > limit)
 			return true;
 	}
 	return false;
@@ -2812,7 +2813,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 	/* Do not sent more than we queued. 1/4 is reserved for possible
 	 * copying overhead: fragmentation, tunneling, mangling etc.
 	 */
-	if (atomic_read(&sk->sk_wmem_alloc) >
+	if (refcount_read(&sk->sk_wmem_alloc) >
 	    min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
 		  sk->sk_sndbuf))
 		return -EAGAIN;
@@ -3267,6 +3268,7 @@ static void tcp_connect_init(struct sock *sk)
 	const struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u8 rcv_wscale;
+	u32 rcv_wnd;
 
 	/* We'll fix this up when we get a response from the other end.
 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
@@ -3300,13 +3302,17 @@ static void tcp_connect_init(struct sock *sk)
 	    (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
 		tp->window_clamp = tcp_full_space(sk);
 
+	rcv_wnd = tcp_rwnd_init_bpf(sk);
+	if (rcv_wnd == 0)
+		rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+
 	tcp_select_initial_window(tcp_full_space(sk),
 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
 				  sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
 				  &rcv_wscale,
-				  dst_metric(dst, RTAX_INITRWND));
+				  rcv_wnd);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
 	tp->rcv_ssthresh = tp->rcv_wnd;
@@ -3327,7 +3333,7 @@ static void tcp_connect_init(struct sock *sk)
 	tp->rcv_wup = tp->rcv_nxt;
 	tp->copied_seq = tp->rcv_nxt;
 
-	inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
+	inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
 	inet_csk(sk)->icsk_retransmits = 0;
 	tcp_clear_retrans(tp);
 }
@@ -3440,6 +3446,7 @@ int tcp_connect(struct sock *sk)
 	struct sk_buff *buff;
 	int err;
 
+	tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
 	tcp_connect_init(sk);
 
 	if (unlikely(tp->repair)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 47c7aa0501af..25294d43e147 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -577,7 +577,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 
 	sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
 			       dif, &udp_table, NULL);
-	if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
 }
@@ -1163,24 +1163,7 @@ out:
 	return ret;
 }
 
-/* Copy as much information as possible into skb->dev_scratch to avoid
- * possibly multiple cache miss on dequeue();
- */
 #if BITS_PER_LONG == 64
-
-/* we can store multiple info here: truesize, len and the bit needed to
- * compute skb_csum_unnecessary will be on cold cache lines at recvmsg
- * time.
- * skb->len can be stored on 16 bits since the udp header has been already
- * validated and pulled.
- */
-struct udp_dev_scratch {
-	u32 truesize;
-	u16 len;
-	bool is_linear;
-	bool csum_unnecessary;
-};
-
 static void udp_set_dev_scratch(struct sk_buff *skb)
 {
 	struct udp_dev_scratch *scratch;
@@ -1197,22 +1180,6 @@ static int udp_skb_truesize(struct sk_buff *skb)
 {
 	return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
 }
-
-static unsigned int udp_skb_len(struct sk_buff *skb)
-{
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
-}
-
-static bool udp_skb_csum_unnecessary(struct sk_buff *skb)
-{
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
-}
-
-static bool udp_skb_is_linear(struct sk_buff *skb)
-{
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
-}
-
 #else
 static void udp_set_dev_scratch(struct sk_buff *skb)
 {
@@ -1223,21 +1190,6 @@ static int udp_skb_truesize(struct sk_buff *skb)
 {
 	return skb->dev_scratch;
 }
-
-static unsigned int udp_skb_len(struct sk_buff *skb)
-{
-	return skb->len;
-}
-
-static bool udp_skb_csum_unnecessary(struct sk_buff *skb)
-{
-	return skb_csum_unnecessary(skb);
-}
-
-static bool udp_skb_is_linear(struct sk_buff *skb)
-{
-	return !skb_is_nonlinear(skb);
-}
 #endif
 
 /* fully reclaim rmem/fwd memory allocated for skb */
@@ -1598,18 +1550,6 @@ busy_check:
 }
 EXPORT_SYMBOL_GPL(__skb_recv_udp);
 
-static int copy_linear_skb(struct sk_buff *skb, int len, int off,
-			   struct iov_iter *to)
-{
-	int n, copy = len - off;
-
-	n = copy_to_iter(skb->data + off, copy, to);
-	if (n == copy)
-		return 0;
-
-	return -EFAULT;
-}
-
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
@@ -2302,7 +2242,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
 					     uh->source, iph->saddr, dif);
 	}
 
-	if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
+	if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
 		return;
 
 	skb->sk = sk;
@@ -2751,7 +2691,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
 		0, 0L, 0,
 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
 		0, sock_i_ino(sp),
-		atomic_read(&sp->sk_refcnt), sp,
+		refcount_read(&sp->sk_refcnt), sp,
 		atomic_read(&sp->sk_drops));
 }
 
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 9a89c10a55f0..4515836d2a3a 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -55,7 +55,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 				req->id.idiag_dport,
 				req->id.idiag_if, tbl, NULL);
 #endif
-	if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	rcu_read_unlock();
 	err = -ENOENT;
@@ -206,7 +206,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
 		return -EINVAL;
 	}
 
-	if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 
 	rcu_read_unlock();
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a885ffcf0973..114fb64cf176 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1927,15 +1927,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 	if (dad_failed)
 		ifp->flags |= IFA_F_DADFAILED;
 
-	if (ifp->flags&IFA_F_PERMANENT) {
-		spin_lock_bh(&ifp->lock);
-		addrconf_del_dad_work(ifp);
-		ifp->flags |= IFA_F_TENTATIVE;
-		spin_unlock_bh(&ifp->lock);
-		if (dad_failed)
-			ipv6_ifa_notify(0, ifp);
-		in6_ifa_put(ifp);
-	} else if (ifp->flags&IFA_F_TEMPORARY) {
+	if (ifp->flags&IFA_F_TEMPORARY) {
 		struct inet6_ifaddr *ifpub;
 		spin_lock_bh(&ifp->lock);
 		ifpub = ifp->ifpub;
@@ -1948,6 +1940,14 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 			spin_unlock_bh(&ifp->lock);
 		}
 		ipv6_del_addr(ifp);
+	} else if (ifp->flags&IFA_F_PERMANENT || !dad_failed) {
+		spin_lock_bh(&ifp->lock);
+		addrconf_del_dad_work(ifp);
+		ifp->flags |= IFA_F_TENTATIVE;
+		spin_unlock_bh(&ifp->lock);
+		if (dad_failed)
+			ipv6_ifa_notify(0, ifp);
+		in6_ifa_put(ifp);
 	} else {
 		ipv6_del_addr(ifp);
 	}
@@ -3384,6 +3384,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct netdev_notifier_changeupper_info *info;
 	struct inet6_dev *idev = __in6_dev_get(dev);
+	struct net *net = dev_net(dev);
 	int run_pending = 0;
 	int err;
 
@@ -3399,7 +3400,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 	case NETDEV_CHANGEMTU:
 		/* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
 		if (dev->mtu < IPV6_MIN_MTU) {
-			addrconf_ifdown(dev, 1);
+			addrconf_ifdown(dev, dev != net->loopback_dev);
 			break;
 		}
 
@@ -3515,7 +3516,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			 * IPV6_MIN_MTU stop IPv6 on this interface.
 			 */
 			if (dev->mtu < IPV6_MIN_MTU)
-				addrconf_ifdown(dev, 1);
+				addrconf_ifdown(dev, dev != net->loopback_dev);
 		}
 		break;
 
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 8d772fea1dde..44067521e7cd 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -227,7 +227,7 @@ static int calipso_cache_check(const unsigned char *key,
 		    entry->key_len == key_len &&
 		    memcmp(entry->key, key, key_len) == 0) {
 			entry->activity += 1;
-			atomic_inc(&entry->lsm_data->refcount);
+			refcount_inc(&entry->lsm_data->refcount);
 			secattr->cache = entry->lsm_data;
 			secattr->flags |= NETLBL_SECATTR_CACHE;
 			secattr->type = NETLBL_NLTYPE_CALIPSO;
@@ -296,7 +296,7 @@ static int calipso_cache_add(const unsigned char *calipso_ptr,
 	}
 	entry->key_len = calipso_ptr_len;
 	entry->hash = calipso_map_cache_hash(calipso_ptr, calipso_ptr_len);
-	atomic_inc(&secattr->cache->refcount);
+	refcount_inc(&secattr->cache->refcount);
 	entry->lsm_data = secattr->cache;
 
 	bkt = entry->hash & (CALIPSO_CACHE_BUCKETS - 1);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e011122ebd43..a1f918713006 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -250,8 +250,14 @@ ipv4_connected:
 	 */
 
 	err = ip6_datagram_dst_update(sk, true);
-	if (err)
+	if (err) {
+		/* Reset daddr and dport so that udp_v6_early_demux()
+		 * fails to find this socket
+		 */
+		memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
+		inet->inet_dport = 0;
 		goto out;
+	}
 
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
@@ -1035,6 +1041,6 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
 		   0,
 		   sock_i_ino(sp),
-		   atomic_read(&sp->sk_refcnt), sp,
+		   refcount_read(&sp->sk_refcnt), sp,
 		   atomic_read(&sp->sk_drops));
 }
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 71faffdd55d9..9ed35473dcb5 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -275,7 +275,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 			skb->data_len += tailen;
 			skb->truesize += tailen;
 			if (sk)
-				atomic_add(tailen, &sk->sk_wmem_alloc);
+				refcount_add(tailen, &sk->sk_wmem_alloc);
 
 			goto out;
 		}
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d950d43ba255..f02f131f6435 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -30,6 +30,25 @@
 #include <net/ipv6.h>
 #include <linux/icmpv6.h>
 
+static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
+{
+	int off = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr;
+
+	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+		return offsetof(struct ipv6hdr, nexthdr);
+
+	while (off < nhlen) {
+		exthdr = (void *)ipv6_hdr + off;
+		if (exthdr->nexthdr == NEXTHDR_ESP)
+			return off;
+
+		off += ipv6_optlen(exthdr);
+	}
+
+	return 0;
+}
+
 static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 					 struct sk_buff *skb)
 {
@@ -38,6 +57,7 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 	struct xfrm_state *x;
 	__be32 seq;
 	__be32 spi;
+	int nhoff;
 	int err;
 
 	skb_pull(skb, offset);
@@ -72,6 +92,11 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 
 	xo->flags |= XFRM_GRO;
 
+	nhoff = esp6_nexthdr_esp_offset(ipv6_hdr(skb), offset);
+	if (!nhoff)
+		goto out;
+
+	IP6CB(skb)->nhoff = nhoff;
 	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index d0900918a19e..b13b8f93079d 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -75,7 +75,7 @@ begin:
 			continue;
 		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
 			continue;
-		if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+		if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
 			goto out;
 
 		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
@@ -172,7 +172,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 
 	sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
 			    ntohs(dport), dif, &refcounted);
-	if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+	if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5baa6fab4b97..1422d6c08377 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1385,7 +1385,7 @@ emsgsize:
 	 */
 
 	cork->length += length;
-	if ((((length + fragheaderlen) > mtu) ||
+	if ((((length + (skb ? skb->len : headersize)) > mtu) ||
 	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
@@ -1472,7 +1472,7 @@ alloc_new_skb:
 						(flags & MSG_DONTWAIT), &err);
 			} else {
 				skb = NULL;
-				if (atomic_read(&sk->sk_wmem_alloc) <=
+				if (refcount_read(&sk->sk_wmem_alloc) <=
 				    2 * sk->sk_sndbuf)
 					skb = sock_wmalloc(sk,
 							   alloclen + hh_len, 1,
@@ -1581,7 +1581,7 @@ alloc_new_skb:
 			skb->len += copy;
 			skb->data_len += copy;
 			skb->truesize += copy;
-			atomic_add(copy, &sk->sk_wmem_alloc);
+			refcount_add(copy, &sk->sk_wmem_alloc);
 		}
 		offset += copy;
 		length -= copy;
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 2297c9f073ba..d7b679037bae 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -75,8 +75,8 @@ static int masq_device_event(struct notifier_block *this,
 	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_DOWN)
-		nf_ct_iterate_cleanup(net, device_cmp,
-				      (void *)(long)dev->ifindex, 0, 0);
+		nf_ct_iterate_cleanup_net(net, device_cmp,
+					  (void *)(long)dev->ifindex, 0, 0);
 
 	return NOTIFY_DONE;
 }
@@ -99,7 +99,7 @@ static void iterate_cleanup_work(struct work_struct *work)
 	w = container_of(work, struct masq_dev_work, work);
 
 	index = w->ifindex;
-	nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
+	nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0);
 
 	put_net(w->net);
 	kfree(w);
@@ -110,12 +110,12 @@ static void iterate_cleanup_work(struct work_struct *work)
 /* ipv6 inet notifier is an atomic notifier, i.e. we cannot
  * schedule.
  *
- * Unfortunately, nf_ct_iterate_cleanup can run for a long
+ * Unfortunately, nf_ct_iterate_cleanup_net can run for a long
  * time if there are lots of conntracks and the system
  * handles high softirq load, so it frequently calls cond_resched
  * while iterating the conntrack table.
  *
- * So we defer nf_ct_iterate_cleanup walk to the system workqueue.
+ * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue.
  *
  * As we can have 'a lot' of inet_events (depending on amount
  * of ipv6 addresses being deleted), we also need to add an upper
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2e4490076061..0488a24c2a44 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3730,7 +3730,11 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
 #endif
-	 } else if (event == NETDEV_UNREGISTER) {
+	 } else if (event == NETDEV_UNREGISTER &&
+		    dev->reg_state != NETREG_UNREGISTERED) {
+		/* NETDEV_UNREGISTER could be fired for multiple times by
+		 * netdev_wait_allrefs(). Make sure we only call this once.
+		 */
 		in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 		in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e9958b1398cb..ac912bb21747 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -305,7 +305,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
 	 * we try harder to allocate.
 	 */
 	kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
-		kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
+		kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
 		NULL;
 
 	rcu_read_lock();
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 2f7e99af67db..7b75b0620730 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -194,7 +194,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		ireq->pktopts = skb;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f85cbfc183d6..2521690d62d6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -734,7 +734,7 @@ static void tcp_v6_init_req(struct request_sock *req,
 	     np->rxopt.bits.rxinfo ||
 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
 	     np->rxopt.bits.rxohlim || np->repflow)) {
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		ireq->pktopts = skb;
 	}
 }
@@ -1809,7 +1809,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
 		   icsk->icsk_probes_out,
 		   sock_i_ino(sp),
-		   atomic_read(&sp->sk_refcnt), sp,
+		   refcount_read(&sp->sk_refcnt), sp,
 		   jiffies_to_clock_t(icsk->icsk_rto),
 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
 		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
@@ -1842,7 +1842,7 @@ static void get_timewait6_sock(struct seq_file *seq,
 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
 		   tw->tw_substate, 0, 0,
 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
-		   atomic_read(&tw->tw_refcnt), tw);
+		   refcount_read(&tw->tw_refcnt), tw);
 }
 
 static int tcp6_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d1d728805729..4a3e65626e8b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -325,7 +325,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
 
 	sk =  __udp6_lib_lookup(net, saddr, sport, daddr, dport,
 				dif, &udp_table, NULL);
-	if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
 }
@@ -362,7 +362,7 @@ try_again:
 	if (!skb)
 		return err;
 
-	ulen = skb->len;
+	ulen = udp_skb_len(skb);
 	copied = len;
 	if (copied > ulen - off)
 		copied = ulen - off;
@@ -379,14 +379,18 @@ try_again:
 
 	if (copied < ulen || peeking ||
 	    (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
-		checksum_valid = !udp_lib_checksum_complete(skb);
+		checksum_valid = udp_skb_csum_unnecessary(skb) ||
+				!__udp_lib_checksum_complete(skb);
 		if (!checksum_valid)
 			goto csum_copy_err;
 	}
 
-	if (checksum_valid || skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_msg(skb, off, msg, copied);
-	else {
+	if (checksum_valid || udp_skb_csum_unnecessary(skb)) {
+		if (udp_skb_is_linear(skb))
+			err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
+		else
+			err = skb_copy_datagram_msg(skb, off, msg, copied);
+	} else {
 		err = skb_copy_and_csum_datagram_msg(skb, off, msg);
 		if (err == -EINVAL)
 			goto csum_copy_err;
@@ -881,7 +885,8 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
 	struct sock *sk;
 
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-		if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+		if (sk->sk_state == TCP_ESTABLISHED &&
+		    INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
 			return sk;
 		/* Only check first socket in chain */
 		break;
@@ -911,7 +916,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
 	else
 		return;
 
-	if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
+	if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
 		return;
 
 	skb->sk = sk;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 08a807b29298..3ef5d913e7a3 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -43,8 +43,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 		return 1;
 #endif
 
-	ipv6_hdr(skb)->payload_len = htons(skb->len);
 	__skb_push(skb, skb->data - skb_network_header(skb));
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 
 	if (xo && (xo->flags & XFRM_GRO)) {
 		skb_mac_header_rebuild(skb);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ac033e413bc5..148533169b1d 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -402,7 +402,7 @@ static void iucv_sock_destruct(struct sock *sk)
 	}
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 	WARN_ON(sk->sk_wmem_queued);
 	WARN_ON(sk->sk_forward_alloc);
 }
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index bf75c9231cca..c343ac60bf50 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -162,7 +162,7 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 		   psock->sk->sk_receive_queue.qlen,
 		   atomic_read(&psock->sk->sk_rmem_alloc),
 		   psock->sk->sk_write_queue.qlen,
-		   atomic_read(&psock->sk->sk_wmem_alloc));
+		   refcount_read(&psock->sk->sk_wmem_alloc));
 
 	if (psock->done)
 		seq_puts(seq, "Done ");
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ce9b8565d825..edcf1d0f82c8 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -109,7 +109,7 @@ static void pfkey_sock_destruct(struct sock *sk)
 	}
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 
 	atomic_dec(&net_pfkey->socks_nr);
 }
@@ -203,11 +203,11 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
 
 	sock_hold(sk);
 	if (*skb2 == NULL) {
-		if (atomic_read(&skb->users) != 1) {
+		if (refcount_read(&skb->users) != 1) {
 			*skb2 = skb_clone(skb, allocation);
 		} else {
 			*skb2 = skb;
-			atomic_inc(&skb->users);
+			refcount_inc(&skb->users);
 		}
 	}
 	if (*skb2 != NULL) {
@@ -1150,6 +1150,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 			goto out;
 	}
 
+	err = -ENOBUFS;
 	key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
 	if (sa->sadb_sa_auth) {
 		int keysize = 0;
@@ -1161,8 +1162,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		if (key)
 			keysize = (key->sadb_key_bits + 7) / 8;
 		x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
-		if (!x->aalg)
+		if (!x->aalg) {
+			err = -ENOMEM;
 			goto out;
+		}
 		strcpy(x->aalg->alg_name, a->name);
 		x->aalg->alg_key_len = 0;
 		if (key) {
@@ -1181,8 +1184,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 				goto out;
 			}
 			x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
-			if (!x->calg)
+			if (!x->calg) {
+				err = -ENOMEM;
 				goto out;
+			}
 			strcpy(x->calg->alg_name, a->name);
 			x->props.calgo = sa->sadb_sa_encrypt;
 		} else {
@@ -1196,8 +1201,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 			if (key)
 				keysize = (key->sadb_key_bits + 7) / 8;
 			x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
-			if (!x->ealg)
+			if (!x->ealg) {
+				err = -ENOMEM;
 				goto out;
+			}
 			strcpy(x->ealg->alg_name, a->name);
 			x->ealg->alg_key_len = 0;
 			if (key) {
@@ -1242,8 +1249,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		struct xfrm_encap_tmpl *natt;
 
 		x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
-		if (!x->encap)
+		if (!x->encap) {
+			err = -ENOMEM;
 			goto out;
+		}
 
 		natt = x->encap;
 		n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
@@ -2742,6 +2751,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
 	int err, err2;
 
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
+	if (!err)
+		xfrm_garbage_collect(net);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - old silent behavior */
@@ -3728,7 +3739,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v)
 	else
 		seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n",
 			       s,
-			       atomic_read(&s->sk_refcnt),
+			       refcount_read(&s->sk_refcnt),
 			       sk_rmem_alloc_get(s),
 			       sk_wmem_alloc_get(s),
 			       from_kuid_munged(seq_user_ns(f), sock_i_uid(s)),
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index d100aed3d06f..98a005d0d04a 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -144,9 +144,8 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 		   tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" :
 		   "");
 	seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count,
-		   tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
+		   tunnel->sock ? refcount_read(&tunnel->sock->sk_refcnt) : 0,
 		   atomic_read(&tunnel->ref_count));
-
 	seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n",
 		   tunnel->debug,
 		   atomic_long_read(&tunnel->stats.tx_packets),
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 9b02c13d258b..5e91b47f0d2a 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -507,7 +507,7 @@ again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_estab_match(sap, daddr, laddr, rc)) {
 			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
-			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+			if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
 				     !llc_estab_match(sap, daddr, laddr, rc))) {
@@ -566,7 +566,7 @@ again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_listener_match(sap, laddr, rc)) {
 			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
-			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+			if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
 				     !llc_listener_match(sap, laddr, rc))) {
@@ -973,9 +973,9 @@ void llc_sk_free(struct sock *sk)
 	skb_queue_purge(&sk->sk_write_queue);
 	skb_queue_purge(&llc->pdu_unack_q);
 #ifdef LLC_REFCNT_DEBUG
-	if (atomic_read(&sk->sk_refcnt) != 1) {
+	if (refcount_read(&sk->sk_refcnt) != 1) {
 		printk(KERN_DEBUG "Destruction of LLC sock %p delayed in %s, cnt=%d\n",
-			sk, __func__, atomic_read(&sk->sk_refcnt));
+			sk, __func__, refcount_read(&sk->sk_refcnt));
 		printk(KERN_DEBUG "%d LLC sockets are still alive\n",
 			atomic_read(&llc_sock_nr));
 	} else {
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 63b6ab056370..d90928f50226 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -329,7 +329,7 @@ again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_dgram_match(sap, laddr, rc)) {
 			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
-			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+			if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
 				     !llc_dgram_match(sap, laddr, rc))) {
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c9b78e7b342f..913380919301 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,10 +70,9 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
 obj-$(CONFIG_NF_DUP_NETDEV)	+= nf_dup_netdev.o
 
 # nf_tables
-nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o
-nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
-nf_tables-objs += nft_lookup.o nft_dynset.o
+nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \
+		  nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \
+		  nft_byteorder.o nft_payload.o nft_lookup.o nft_dynset.o
 
 obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
 obj-$(CONFIG_NF_TABLES_INET)	+= nf_tables_inet.o
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index ba6a5516dc7c..e495b5e484b1 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -841,14 +841,16 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
 
 static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
 }
 
 static int ip_set_create(struct net *net, struct sock *ctnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const attr[])
+			 const struct nlattr * const attr[],
+			 struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set, *clash = NULL;
@@ -989,7 +991,8 @@ ip_set_destroy_set(struct ip_set *set)
 
 static int ip_set_destroy(struct net *net, struct sock *ctnl,
 			  struct sk_buff *skb, const struct nlmsghdr *nlh,
-			  const struct nlattr * const attr[])
+			  const struct nlattr * const attr[],
+			  struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *s;
@@ -1067,7 +1070,8 @@ ip_set_flush_set(struct ip_set *set)
 
 static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
-			const struct nlattr * const attr[])
+			const struct nlattr * const attr[],
+			struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *s;
@@ -1106,7 +1110,8 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
 
 static int ip_set_rename(struct net *net, struct sock *ctnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const attr[])
+			 const struct nlattr * const attr[],
+			 struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set, *s;
@@ -1155,7 +1160,8 @@ out:
 
 static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *from, *to;
@@ -1428,7 +1434,8 @@ out:
 
 static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	if (unlikely(protocol_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
@@ -1513,7 +1520,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 
 static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set;
@@ -1567,7 +1575,8 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 
 static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set;
@@ -1621,7 +1630,8 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 
 static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
-			const struct nlattr * const attr[])
+			const struct nlattr * const attr[],
+			struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set;
@@ -1656,7 +1666,8 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 
 static int ip_set_header(struct net *net, struct sock *ctnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const attr[])
+			 const struct nlattr * const attr[],
+			 struct netlink_ext_ack *extack)
 {
 	struct ip_set_net *inst = ip_set_pernet(net);
 	const struct ip_set *set;
@@ -1712,7 +1723,8 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
 
 static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[])
+		       const struct nlattr * const attr[],
+		       struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
@@ -1770,7 +1782,8 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
 
 static int ip_set_protocol(struct net *net, struct sock *ctnl,
 			   struct sk_buff *skb, const struct nlmsghdr *nlh,
-			   const struct nlattr * const attr[])
+			   const struct nlattr * const attr[],
+			   struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 42c3e3ba1b94..3f09cdb42562 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -38,8 +38,8 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		break;
 	}
 	case IPPROTO_SCTP: {
-		sctp_sctphdr_t _sh;
-		const sctp_sctphdr_t *sh;
+		struct sctphdr _sh;
+		const struct sctphdr *sh;
 
 		sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
 		if (!sh)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index ad99c1ceea6f..e31956b58aba 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1037,9 +1037,9 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
  */
 static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
 {
-	sctp_chunkhdr_t *sch, schunk;
-	sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
-			sizeof(schunk), &schunk);
+	struct sctp_chunkhdr *sch, schunk;
+	sch = skb_header_pointer(skb, nh_len + sizeof(struct sctphdr),
+				 sizeof(schunk), &schunk);
 	if (sch == NULL)
 		return 0;
 	if (sch->type == SCTP_CID_ABORT)
@@ -1070,9 +1070,9 @@ static inline bool is_new_conn(const struct sk_buff *skb,
 		return th->syn;
 	}
 	case IPPROTO_SCTP: {
-		sctp_chunkhdr_t *sch, schunk;
+		struct sctp_chunkhdr *sch, schunk;
 
-		sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
+		sch = skb_header_pointer(skb, iph->len + sizeof(struct sctphdr),
 					 sizeof(schunk), &schunk);
 		if (sch == NULL)
 			return false;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 56f8e4b204ff..3ffad4adaddf 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -15,16 +15,15 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		   struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_service *svc;
-	sctp_chunkhdr_t _schunkh, *sch;
-	sctp_sctphdr_t *sh, _sctph;
+	struct sctp_chunkhdr _schunkh, *sch;
+	struct sctphdr *sh, _sctph;
 	__be16 _ports[2], *ports = NULL;
 
 	if (likely(!ip_vs_iph_icmp(iph))) {
 		sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
 		if (sh) {
-			sch = skb_header_pointer(
-				skb, iph->len + sizeof(sctp_sctphdr_t),
-				sizeof(_schunkh), &_schunkh);
+			sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
+						 sizeof(_schunkh), &_schunkh);
 			if (sch && (sch->type == SCTP_CID_INIT ||
 				    sysctl_sloppy_sctp(ipvs)))
 				ports = &sh->source;
@@ -77,7 +76,7 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 	return 1;
 }
 
-static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
+static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph,
 			  unsigned int sctphoff)
 {
 	sctph->checksum = sctp_compute_cksum(skb, sctphoff);
@@ -88,7 +87,7 @@ static int
 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 {
-	sctp_sctphdr_t *sctph;
+	struct sctphdr *sctph;
 	unsigned int sctphoff = iph->len;
 	bool payload_csum = false;
 
@@ -135,7 +134,7 @@ static int
 sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 {
-	sctp_sctphdr_t *sctph;
+	struct sctphdr *sctph;
 	unsigned int sctphoff = iph->len;
 	bool payload_csum = false;
 
@@ -378,7 +377,7 @@ static inline void
 set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 		int direction, const struct sk_buff *skb)
 {
-	sctp_chunkhdr_t _sctpch, *sch;
+	struct sctp_chunkhdr _sctpch, *sch;
 	unsigned char chunk_type;
 	int event, next_state;
 	int ihl, cofs;
@@ -389,7 +388,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	ihl = ip_hdrlen(skb);
 #endif
 
-	cofs = ihl + sizeof(sctp_sctphdr_t);
+	cofs = ihl + sizeof(struct sctphdr);
 	sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
 	if (sch == NULL)
 		return;
@@ -410,7 +409,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	    (sch->type == SCTP_CID_COOKIE_ACK)) {
 		int clen = ntohs(sch->length);
 
-		if (clen >= sizeof(sctp_chunkhdr_t)) {
+		if (clen >= sizeof(_sctpch)) {
 			sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
 						 sizeof(_sctpch), &_sctpch);
 			if (sch && sch->type == SCTP_CID_ABORT)
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 03d2ccffa9fa..20edd589fe06 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -197,8 +197,8 @@ static void __exit nf_conntrack_amanda_fini(void)
 {
 	int i;
 
-	nf_conntrack_helper_unregister(&amanda_helper[0]);
-	nf_conntrack_helper_unregister(&amanda_helper[1]);
+	nf_conntrack_helpers_unregister(amanda_helper,
+					ARRAY_SIZE(amanda_helper));
 	for (i = 0; i < ARRAY_SIZE(search); i++)
 		textsearch_destroy(search[i].ts);
 }
@@ -218,16 +218,12 @@ static int __init nf_conntrack_amanda_init(void)
 			goto err1;
 		}
 	}
-	ret = nf_conntrack_helper_register(&amanda_helper[0]);
+	ret = nf_conntrack_helpers_register(amanda_helper,
+					    ARRAY_SIZE(amanda_helper));
 	if (ret < 0)
 		goto err1;
-	ret = nf_conntrack_helper_register(&amanda_helper[1]);
-	if (ret < 0)
-		goto err2;
 	return 0;
 
-err2:
-	nf_conntrack_helper_unregister(&amanda_helper[0]);
 err1:
 	while (--i >= 0)
 		textsearch_destroy(search[i].ts);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e847dbaa0c6b..9979f46c81dc 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1586,13 +1586,12 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
 
 /* Bring out ya dead! */
 static struct nf_conn *
-get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
+get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
 		void *data, unsigned int *bucket)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 	struct hlist_nulls_node *n;
-	int cpu;
 	spinlock_t *lockp;
 
 	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
@@ -1604,8 +1603,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 				if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 					continue;
 				ct = nf_ct_tuplehash_to_ctrack(h);
-				if (net_eq(nf_ct_net(ct), net) &&
-				    iter(ct, data))
+				if (iter(ct, data))
 					goto found;
 			}
 		}
@@ -1614,51 +1612,150 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 		cond_resched();
 	}
 
+	return NULL;
+found:
+	atomic_inc(&ct->ct_general.use);
+	spin_unlock(lockp);
+	local_bh_enable();
+	return ct;
+}
+
+static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
+				  void *data, u32 portid, int report)
+{
+	unsigned int bucket = 0, sequence;
+	struct nf_conn *ct;
+
+	might_sleep();
+
+	for (;;) {
+		sequence = read_seqcount_begin(&nf_conntrack_generation);
+
+		while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+			/* Time to push up daises... */
+
+			nf_ct_delete(ct, portid, report);
+			nf_ct_put(ct);
+			cond_resched();
+		}
+
+		if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
+			break;
+		bucket = 0;
+	}
+}
+
+struct iter_data {
+	int (*iter)(struct nf_conn *i, void *data);
+	void *data;
+	struct net *net;
+};
+
+static int iter_net_only(struct nf_conn *i, void *data)
+{
+	struct iter_data *d = data;
+
+	if (!net_eq(d->net, nf_ct_net(i)))
+		return 0;
+
+	return d->iter(i, d->data);
+}
+
+static void
+__nf_ct_unconfirmed_destroy(struct net *net)
+{
+	int cpu;
+
 	for_each_possible_cpu(cpu) {
-		struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+		struct nf_conntrack_tuple_hash *h;
+		struct hlist_nulls_node *n;
+		struct ct_pcpu *pcpu;
+
+		pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
 
 		spin_lock_bh(&pcpu->lock);
 		hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
+			struct nf_conn *ct;
+
 			ct = nf_ct_tuplehash_to_ctrack(h);
-			if (iter(ct, data))
-				set_bit(IPS_DYING_BIT, &ct->status);
+
+			/* we cannot call iter() on unconfirmed list, the
+			 * owning cpu can reallocate ct->ext at any time.
+			 */
+			set_bit(IPS_DYING_BIT, &ct->status);
 		}
 		spin_unlock_bh(&pcpu->lock);
 		cond_resched();
 	}
-	return NULL;
-found:
-	atomic_inc(&ct->ct_general.use);
-	spin_unlock(lockp);
-	local_bh_enable();
-	return ct;
 }
 
-void nf_ct_iterate_cleanup(struct net *net,
-			   int (*iter)(struct nf_conn *i, void *data),
-			   void *data, u32 portid, int report)
+void nf_ct_iterate_cleanup_net(struct net *net,
+			       int (*iter)(struct nf_conn *i, void *data),
+			       void *data, u32 portid, int report)
 {
-	struct nf_conn *ct;
-	unsigned int bucket = 0;
+	struct iter_data d;
 
 	might_sleep();
 
 	if (atomic_read(&net->ct.count) == 0)
 		return;
 
-	while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
-		/* Time to push up daises... */
+	__nf_ct_unconfirmed_destroy(net);
 
-		nf_ct_delete(ct, portid, report);
-		nf_ct_put(ct);
-		cond_resched();
+	d.iter = iter;
+	d.data = data;
+	d.net = net;
+
+	synchronize_net();
+
+	nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
+}
+EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
+
+/**
+ * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table
+ * @iter: callback to invoke for each conntrack
+ * @data: data to pass to @iter
+ *
+ * Like nf_ct_iterate_cleanup, but first marks conntracks on the
+ * unconfirmed list as dying (so they will not be inserted into
+ * main table).
+ *
+ * Can only be called in module exit path.
+ */
+void
+nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
+{
+	struct net *net;
+
+	rtnl_lock();
+	for_each_net(net) {
+		if (atomic_read(&net->ct.count) == 0)
+			continue;
+		__nf_ct_unconfirmed_destroy(net);
 	}
+	rtnl_unlock();
+
+	/* Need to wait for netns cleanup worker to finish, if its
+	 * running -- it might have deleted a net namespace from
+	 * the global list, so our __nf_ct_unconfirmed_destroy() might
+	 * not have affected all namespaces.
+	 */
+	net_ns_barrier();
+
+	/* a conntrack could have been unlinked from unconfirmed list
+	 * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy().
+	 * This makes sure its inserted into conntrack table.
+	 */
+	synchronize_net();
+
+	nf_ct_iterate_cleanup(iter, data, 0, 0);
 }
-EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
+EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy);
 
 static int kill_all(struct nf_conn *i, void *data)
 {
-	return 1;
+	return net_eq(nf_ct_net(i), data);
 }
 
 void nf_ct_free_hashtable(void *hash, unsigned int size)
@@ -1723,7 +1820,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
 i_see_dead_people:
 	busy = 0;
 	list_for_each_entry(net, net_exit_list, exit_list) {
-		nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
+		nf_ct_iterate_cleanup(kill_all, net, 0, 0);
 		if (atomic_read(&net->ct.count) != 0)
 			busy = 1;
 	}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 3bcdc718484e..f71f0d2558fd 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1815,14 +1815,44 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
 	},
 };
 
+static int __init h323_helper_init(void)
+{
+	int ret;
+
+	ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245);
+	if (ret < 0)
+		return ret;
+	ret = nf_conntrack_helpers_register(nf_conntrack_helper_q931,
+					ARRAY_SIZE(nf_conntrack_helper_q931));
+	if (ret < 0)
+		goto err1;
+	ret = nf_conntrack_helpers_register(nf_conntrack_helper_ras,
+					ARRAY_SIZE(nf_conntrack_helper_ras));
+	if (ret < 0)
+		goto err2;
+
+	return 0;
+err2:
+	nf_conntrack_helpers_unregister(nf_conntrack_helper_q931,
+					ARRAY_SIZE(nf_conntrack_helper_q931));
+err1:
+	nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+	return ret;
+}
+
+static void __exit h323_helper_exit(void)
+{
+	nf_conntrack_helpers_unregister(nf_conntrack_helper_ras,
+					ARRAY_SIZE(nf_conntrack_helper_ras));
+	nf_conntrack_helpers_unregister(nf_conntrack_helper_q931,
+					ARRAY_SIZE(nf_conntrack_helper_q931));
+	nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+}
+
 /****************************************************************************/
 static void __exit nf_conntrack_h323_fini(void)
 {
-	nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[1]);
-	nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
-	nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
-	nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
-	nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+	h323_helper_exit();
 	kfree(h323_buffer);
 	pr_debug("nf_ct_h323: fini\n");
 }
@@ -1837,32 +1867,11 @@ static int __init nf_conntrack_h323_init(void)
 	h323_buffer = kmalloc(65536, GFP_KERNEL);
 	if (!h323_buffer)
 		return -ENOMEM;
-	ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245);
+	ret = h323_helper_init();
 	if (ret < 0)
 		goto err1;
-	ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]);
-	if (ret < 0)
-		goto err2;
-	ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]);
-	if (ret < 0)
-		goto err3;
-	ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]);
-	if (ret < 0)
-		goto err4;
-	ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]);
-	if (ret < 0)
-		goto err5;
 	pr_debug("nf_ct_h323: init success\n");
 	return 0;
-
-err5:
-	nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
-err4:
-	nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
-err3:
-	nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
-err2:
-	nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
 err1:
 	kfree(h323_buffer);
 	return ret;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 7f6100ca63be..9129bb3b5153 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -285,16 +285,16 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
 
 /* appropriate ct lock protecting must be taken by caller */
-static inline int unhelp(struct nf_conntrack_tuple_hash *i,
-			 const struct nf_conntrack_helper *me)
+static int unhelp(struct nf_conn *ct, void *me)
 {
-	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
 	struct nf_conn_help *help = nfct_help(ct);
 
 	if (help && rcu_dereference_raw(help->helper) == me) {
 		nf_conntrack_event(IPCT_HELPER, ct);
 		RCU_INIT_POINTER(help->helper, NULL);
 	}
+
+	/* We are not intended to delete this conntrack. */
 	return 0;
 }
 
@@ -437,33 +437,10 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
 
-static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
-					     struct net *net)
-{
-	struct nf_conntrack_tuple_hash *h;
-	const struct hlist_nulls_node *nn;
-	int cpu;
-
-	/* Get rid of expecteds, set helpers to NULL. */
-	for_each_possible_cpu(cpu) {
-		struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
-		spin_lock_bh(&pcpu->lock);
-		hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
-			unhelp(h, me);
-		spin_unlock_bh(&pcpu->lock);
-	}
-}
-
 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 {
-	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
 	const struct hlist_node *next;
-	const struct hlist_nulls_node *nn;
-	unsigned int last_hsize;
-	spinlock_t *lock;
-	struct net *net;
 	unsigned int i;
 
 	mutex_lock(&nf_ct_helper_mutex);
@@ -491,26 +468,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	}
 	spin_unlock_bh(&nf_conntrack_expect_lock);
 
-	rtnl_lock();
-	for_each_net(net)
-		__nf_conntrack_helper_unregister(me, net);
-	rtnl_unlock();
-
-	local_bh_disable();
-restart:
-	last_hsize = nf_conntrack_htable_size;
-	for (i = 0; i < last_hsize; i++) {
-		lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
-		nf_conntrack_lock(lock);
-		if (last_hsize != nf_conntrack_htable_size) {
-			spin_unlock(lock);
-			goto restart;
-		}
-		hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
-			unhelp(h, me);
-		spin_unlock(lock);
-	}
-	local_bh_enable();
+	nf_ct_iterate_destroy(unhelp, me);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a8be9b72e6cd..7999e70c3bfb 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -636,11 +636,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	if (events & (1 << IPCT_DESTROY)) {
 		type = IPCTNL_MSG_CT_DELETE;
 		group = NFNLGRP_CONNTRACK_DESTROY;
-	} else  if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
+	} else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
 		type = IPCTNL_MSG_CT_NEW;
 		flags = NLM_F_CREATE|NLM_F_EXCL;
 		group = NFNLGRP_CONNTRACK_NEW;
-	} else  if (events) {
+	} else if (events) {
 		type = IPCTNL_MSG_CT_NEW;
 		group = NFNLGRP_CONNTRACK_UPDATE;
 	} else
@@ -1122,8 +1122,8 @@ static int ctnetlink_flush_conntrack(struct net *net,
 			return PTR_ERR(filter);
 	}
 
-	nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter,
-			      portid, report);
+	nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
+				  portid, report);
 	kfree(filter);
 
 	return 0;
@@ -1132,7 +1132,8 @@ static int ctnetlink_flush_conntrack(struct net *net,
 static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
 				   struct sk_buff *skb,
 				   const struct nlmsghdr *nlh,
-				   const struct nlattr * const cda[])
+				   const struct nlattr * const cda[],
+				   struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
@@ -1184,7 +1185,8 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
 static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
 				   struct sk_buff *skb,
 				   const struct nlmsghdr *nlh,
-				   const struct nlattr * const cda[])
+				   const struct nlattr * const cda[],
+				   struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
@@ -1345,7 +1347,8 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
 static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl,
 				  struct sk_buff *skb,
 				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const cda[])
+				  const struct nlattr * const cda[],
+				  struct netlink_ext_ack *extack)
 {
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -1367,7 +1370,8 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
 static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl,
 					struct sk_buff *skb,
 					const struct nlmsghdr *nlh,
-					const struct nlattr * const cda[])
+					const struct nlattr * const cda[],
+					struct netlink_ext_ack *extack)
 {
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -1906,7 +1910,8 @@ err1:
 static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 				   struct sk_buff *skb,
 				   const struct nlmsghdr *nlh,
-				   const struct nlattr * const cda[])
+				   const struct nlattr * const cda[],
+				   struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_tuple otuple, rtuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
@@ -2071,7 +2076,8 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
 static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -2116,7 +2122,8 @@ nlmsg_failure:
 
 static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const cda[])
+			     const struct nlattr * const cda[],
+			     struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb2;
 	int err;
@@ -2778,7 +2785,8 @@ out:
 static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	int err;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2822,7 +2830,8 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
 
 static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const cda[])
+				const struct nlattr * const cda[],
+				struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
@@ -2834,7 +2843,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		if (cda[CTA_EXPECT_MASTER])
-			return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda);
+			return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda,
+						     extack);
 		else {
 			struct netlink_dump_control c = {
 				.dump = ctnetlink_exp_dump_table,
@@ -2902,7 +2912,8 @@ out:
 
 static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const cda[])
+				const struct nlattr * const cda[],
+				struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
@@ -3190,7 +3201,8 @@ err_ct:
 
 static int ctnetlink_new_expect(struct net *net, struct sock *ctnl,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const cda[])
+				const struct nlattr * const cda[],
+				struct netlink_ext_ack *extack)
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
@@ -3296,7 +3308,8 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
 static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl,
 				  struct sk_buff *skb,
 				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const cda[])
+				  const struct nlattr * const cda[],
+				  struct netlink_ext_ack *extack)
 {
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 2de6c1fe3261..1dcad229c3cc 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,8 +28,8 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 
-static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly;
-struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly;
+static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
+struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_l3protos);
 
 static DEFINE_MUTEX(nf_ct_proto_mutex);
@@ -68,7 +68,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
 struct nf_conntrack_l4proto *
 __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
 {
-	if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
+	if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
 		return &nf_conntrack_l4proto_generic;
 
 	return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
@@ -212,7 +212,7 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
 	int ret = 0;
 	struct nf_conntrack_l3proto *old;
 
-	if (proto->l3proto >= AF_MAX)
+	if (proto->l3proto >= NFPROTO_NUMPROTO)
 		return -EBUSY;
 
 	if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
@@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
 
 void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 {
-	BUG_ON(proto->l3proto >= AF_MAX);
+	BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
 
 	mutex_lock(&nf_ct_proto_mutex);
 	BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
@@ -265,6 +265,8 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 	mutex_unlock(&nf_ct_proto_mutex);
 
 	synchronize_rcu();
+	/* Remove all contrack entries for this protocol */
+	nf_ct_iterate_destroy(kill_l3proto, proto);
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
 
@@ -280,9 +282,6 @@ void nf_ct_l3proto_pernet_unregister(struct net *net,
 	 */
 	if (proto->net_ns_put)
 		proto->net_ns_put(net);
-
-	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
 
@@ -342,7 +341,7 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 
-	if (l4proto->l3proto >= PF_MAX)
+	if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
 		return -EBUSY;
 
 	if ((l4proto->to_nlattr && !l4proto->nlattr_size) ||
@@ -421,17 +420,23 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
 
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+
 {
-	BUG_ON(l4proto->l3proto >= PF_MAX);
+	BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
 
-	mutex_lock(&nf_ct_proto_mutex);
 	BUG_ON(rcu_dereference_protected(
 			nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
 			lockdep_is_held(&nf_ct_proto_mutex)
 			) != l4proto);
 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
 			   &nf_conntrack_l4proto_generic);
+}
+
+void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+{
+	mutex_lock(&nf_ct_proto_mutex);
+	__nf_ct_l4proto_unregister_one(l4proto);
 	mutex_unlock(&nf_ct_proto_mutex);
 
 	synchronize_rcu();
@@ -448,9 +453,6 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
 
 	pn->users--;
 	nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
-
-	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
 
@@ -500,8 +502,14 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
 void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
 			      unsigned int num_proto)
 {
+	mutex_lock(&nf_ct_proto_mutex);
 	while (num_proto-- != 0)
-		nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+		__nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+	mutex_unlock(&nf_ct_proto_mutex);
+
+	synchronize_net();
+	/* Remove all contrack entries for this protocol */
+	nf_ct_iterate_destroy(kill_l4proto, l4proto);
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
 
@@ -548,7 +556,7 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
 int nf_conntrack_proto_init(void)
 {
 	unsigned int i;
-	for (i = 0; i < AF_MAX; i++)
+	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		rcu_assign_pointer(nf_ct_l3protos[i],
 				   &nf_conntrack_l3proto_generic);
 	return 0;
@@ -558,6 +566,6 @@ void nf_conntrack_proto_fini(void)
 {
 	unsigned int i;
 	/* free l3proto protocol tables */
-	for (i = 0; i < PF_MAX; i++)
+	for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
 		kfree(nf_ct_protos[i]);
 }
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 1c5b14a6cab3..31c6c8ee9d5d 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -190,7 +190,7 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 }
 
 #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count)	\
-for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0;	\
+for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0;	\
 	(offset) < (skb)->len &&					\
 	((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch)));	\
 	(offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
@@ -202,7 +202,7 @@ static int do_basic_checks(struct nf_conn *ct,
 			   unsigned long *map)
 {
 	u_int32_t offset, count;
-	sctp_chunkhdr_t _sch, *sch;
+	struct sctp_chunkhdr _sch, *sch;
 	int flag;
 
 	flag = 0;
@@ -395,9 +395,9 @@ static int sctp_packet(struct nf_conn *ct,
 		/* If it is an INIT or an INIT ACK note down the vtag */
 		if (sch->type == SCTP_CID_INIT ||
 		    sch->type == SCTP_CID_INIT_ACK) {
-			sctp_inithdr_t _inithdr, *ih;
+			struct sctp_inithdr _inithdr, *ih;
 
-			ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+			ih = skb_header_pointer(skb, offset + sizeof(_sch),
 						sizeof(_inithdr), &_inithdr);
 			if (ih == NULL)
 				goto out_unlock;
@@ -471,23 +471,20 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 
 		/* Copy the vtag into the state info */
 		if (sch->type == SCTP_CID_INIT) {
-			if (sh->vtag == 0) {
-				sctp_inithdr_t _inithdr, *ih;
+			struct sctp_inithdr _inithdr, *ih;
+			/* Sec 8.5.1 (A) */
+			if (sh->vtag)
+				return false;
 
-				ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
-							sizeof(_inithdr), &_inithdr);
-				if (ih == NULL)
-					return false;
+			ih = skb_header_pointer(skb, offset + sizeof(_sch),
+						sizeof(_inithdr), &_inithdr);
+			if (!ih)
+				return false;
 
-				pr_debug("Setting vtag %x for new conn\n",
-					 ih->init_tag);
+			pr_debug("Setting vtag %x for new conn\n",
+				 ih->init_tag);
 
-				ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
-								ih->init_tag;
-			} else {
-				/* Sec 8.5.1 (A) */
-				return false;
-			}
+			ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
 		} else if (sch->type == SCTP_CID_HEARTBEAT) {
 			pr_debug("Setting vtag %x for secondary conntrack\n",
 				 sh->vtag);
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index c9d7f95768ab..f4a566e67213 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -13,6 +13,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_dup_netdev.h>
 
 static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
 {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6c72922d20ca..832c5a08d9a5 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -582,12 +582,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
 		.l3proto = l3proto,
 		.l4proto = l4proto,
 	};
-	struct net *net;
 
-	rtnl_lock();
-	for_each_net(net)
-		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
-	rtnl_unlock();
+	nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
 }
 
 static void nf_nat_l3proto_clean(u8 l3proto)
@@ -595,13 +591,8 @@ static void nf_nat_l3proto_clean(u8 l3proto)
 	struct nf_nat_proto_clean clean = {
 		.l3proto = l3proto,
 	};
-	struct net *net;
 
-	rtnl_lock();
-
-	for_each_net(net)
-		nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
-	rtnl_unlock();
+	nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
 }
 
 /* Protocol registration. */
@@ -822,17 +813,6 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 }
 #endif
 
-static void __net_exit nf_nat_net_exit(struct net *net)
-{
-	struct nf_nat_proto_clean clean = {};
-
-	nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
-}
-
-static struct pernet_operations nf_nat_net_ops = {
-	.exit = nf_nat_net_exit,
-};
-
 static struct nf_ct_helper_expectfn follow_master_nat = {
 	.name		= "nat-follow-master",
 	.expectfn	= nf_nat_follow_master,
@@ -853,10 +833,6 @@ static int __init nf_nat_init(void)
 		return ret;
 	}
 
-	ret = register_pernet_subsys(&nf_nat_net_ops);
-	if (ret < 0)
-		goto cleanup_extend;
-
 	nf_ct_helper_expectfn_register(&follow_master_nat);
 
 	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
@@ -867,18 +843,15 @@ static int __init nf_nat_init(void)
 	RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
 #endif
 	return 0;
-
- cleanup_extend:
-	rhltable_destroy(&nf_nat_bysource_table);
-	nf_ct_extend_unregister(&nat_extend);
-	return ret;
 }
 
 static void __exit nf_nat_cleanup(void)
 {
+	struct nf_nat_proto_clean clean = {};
 	unsigned int i;
 
-	unregister_pernet_subsys(&nf_nat_net_ops);
+	nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
+
 	nf_ct_extend_unregister(&nat_extend);
 	nf_ct_helper_expectfn_unregister(&follow_master_nat);
 	RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 804e8a0ab36e..c57ee3240b1d 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -32,7 +32,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 	       const struct nf_conntrack_tuple *tuple,
 	       enum nf_nat_manip_type maniptype)
 {
-	sctp_sctphdr_t *hdr;
+	struct sctphdr *hdr;
 	int hdrsize = 8;
 
 	/* This could be an inner header returned in imcp packet; in such
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index da314be0c048..7843efa33c59 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -13,6 +13,7 @@
 #include <linux/list.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/vmalloc.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nf_tables.h>
@@ -386,7 +387,7 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table)
 	return ++table->hgenerator;
 }
 
-static const struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX];
+static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
 
 static const struct nf_chain_type *
 __nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
@@ -534,7 +535,8 @@ done:
 
 static int nf_tables_gettable(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_cur(net);
@@ -677,7 +679,8 @@ err:
 
 static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -830,7 +833,8 @@ out:
 
 static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -869,6 +873,9 @@ int nft_register_chain_type(const struct nf_chain_type *ctype)
 {
 	int err = 0;
 
+	if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO))
+		return -EINVAL;
+
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
 	if (chain_type[ctype->family][ctype->type] != NULL) {
 		err = -EBUSY;
@@ -1123,7 +1130,8 @@ done:
 
 static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_cur(net);
@@ -1319,7 +1327,8 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
 
 static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nlattr * uninitialized_var(name);
@@ -1557,7 +1566,8 @@ err1:
 
 static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -2038,7 +2048,8 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 
 static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nla[])
+			     const struct nlattr * const nla[],
+			     struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_cur(net);
@@ -2131,7 +2142,8 @@ static struct nft_expr_info *info;
 
 static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nla[])
+			     const struct nlattr * const nla[],
+			     struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -2313,7 +2325,8 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
 
 static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nla[])
+			     const struct nlattr * const nla[],
+			     struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -2377,64 +2390,77 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
  * Sets
  */
 
-static LIST_HEAD(nf_tables_set_ops);
+static LIST_HEAD(nf_tables_set_types);
 
-int nft_register_set(struct nft_set_ops *ops)
+int nft_register_set(struct nft_set_type *type)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_add_tail_rcu(&ops->list, &nf_tables_set_ops);
+	list_add_tail_rcu(&type->list, &nf_tables_set_types);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nft_register_set);
 
-void nft_unregister_set(struct nft_set_ops *ops)
+void nft_unregister_set(struct nft_set_type *type)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_del_rcu(&ops->list);
+	list_del_rcu(&type->list);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 }
 EXPORT_SYMBOL_GPL(nft_unregister_set);
 
+#define NFT_SET_FEATURES	(NFT_SET_INTERVAL | NFT_SET_MAP | \
+				 NFT_SET_TIMEOUT | NFT_SET_OBJECT)
+
+static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags)
+{
+	return (flags & ops->features) == (flags & NFT_SET_FEATURES);
+}
+
 /*
  * Select a set implementation based on the data characteristics and the
  * given policy. The total memory use might not be known if no size is
  * given, in that case the amount of memory per element is used.
  */
 static const struct nft_set_ops *
-nft_select_set_ops(const struct nlattr * const nla[],
+nft_select_set_ops(const struct nft_ctx *ctx,
+		   const struct nlattr * const nla[],
 		   const struct nft_set_desc *desc,
 		   enum nft_set_policies policy)
 {
 	const struct nft_set_ops *ops, *bops;
 	struct nft_set_estimate est, best;
-	u32 features;
+	const struct nft_set_type *type;
+	u32 flags = 0;
 
 #ifdef CONFIG_MODULES
-	if (list_empty(&nf_tables_set_ops)) {
+	if (list_empty(&nf_tables_set_types)) {
 		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 		request_module("nft-set");
 		nfnl_lock(NFNL_SUBSYS_NFTABLES);
-		if (!list_empty(&nf_tables_set_ops))
+		if (!list_empty(&nf_tables_set_types))
 			return ERR_PTR(-EAGAIN);
 	}
 #endif
-	features = 0;
-	if (nla[NFTA_SET_FLAGS] != NULL) {
-		features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
-		features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT |
-			    NFT_SET_OBJECT;
-	}
+	if (nla[NFTA_SET_FLAGS] != NULL)
+		flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
 
 	bops	    = NULL;
 	best.size   = ~0;
 	best.lookup = ~0;
 	best.space  = ~0;
 
-	list_for_each_entry(ops, &nf_tables_set_ops, list) {
-		if ((ops->features & features) != features)
+	list_for_each_entry(type, &nf_tables_set_types, list) {
+		if (!type->select_ops)
+			ops = type->ops;
+		else
+			ops = type->select_ops(ctx, desc, flags);
+		if (!ops)
+			continue;
+
+		if (!nft_set_ops_candidate(ops, flags))
 			continue;
-		if (!ops->estimate(desc, features, &est))
+		if (!ops->estimate(desc, flags, &est))
 			continue;
 
 		switch (policy) {
@@ -2465,10 +2491,10 @@ nft_select_set_ops(const struct nlattr * const nla[],
 			break;
 		}
 
-		if (!try_module_get(ops->owner))
+		if (!try_module_get(type->owner))
 			continue;
 		if (bops != NULL)
-			module_put(bops->owner);
+			module_put(bops->type->owner);
 
 		bops = ops;
 		best = est;
@@ -2816,7 +2842,8 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
 
 static int nf_tables_getset(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	u8 genmask = nft_genmask_cur(net);
 	const struct nft_set *set;
@@ -2892,7 +2919,8 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
 
 static int nf_tables_newset(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -3029,7 +3057,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
 		return -ENOENT;
 
-	ops = nft_select_set_ops(nla, &desc, policy);
+	ops = nft_select_set_ops(&ctx, nla, &desc, policy);
 	if (IS_ERR(ops))
 		return PTR_ERR(ops);
 
@@ -3039,12 +3067,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 
 	size = 0;
 	if (ops->privsize != NULL)
-		size = ops->privsize(nla);
+		size = ops->privsize(nla, &desc);
 
-	err = -ENOMEM;
-	set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
-	if (set == NULL)
+	set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
+	if (!set) {
+		err = -ENOMEM;
 		goto err1;
+	}
 
 	nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
 	err = nf_tables_set_alloc_name(&ctx, set, name);
@@ -3087,17 +3116,17 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 err3:
 	ops->destroy(set);
 err2:
-	kfree(set);
+	kvfree(set);
 err1:
-	module_put(ops->owner);
+	module_put(ops->type->owner);
 	return err;
 }
 
 static void nft_set_destroy(struct nft_set *set)
 {
 	set->ops->destroy(set);
-	module_put(set->ops->owner);
-	kfree(set);
+	module_put(set->ops->type->owner);
+	kvfree(set);
 }
 
 static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
@@ -3109,7 +3138,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set
 
 static int nf_tables_delset(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -3469,7 +3499,8 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb)
 
 static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const nla[])
+				const struct nlattr * const nla[],
+				struct netlink_ext_ack *extack)
 {
 	u8 genmask = nft_genmask_cur(net);
 	const struct nft_set *set;
@@ -3870,7 +3901,8 @@ err1:
 
 static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const nla[])
+				const struct nlattr * const nla[],
+				struct netlink_ext_ack *extack)
 {
 	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *attr;
@@ -4067,7 +4099,8 @@ err1:
 
 static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const nla[])
+				const struct nlattr * const nla[],
+				struct netlink_ext_ack *extack)
 {
 	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *attr;
@@ -4277,7 +4310,8 @@ static const struct nft_object_type *nft_obj_type_get(u32 objtype)
 
 static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nft_object_type *type;
@@ -4471,7 +4505,8 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
 
 static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_cur(net);
@@ -4549,8 +4584,9 @@ static void nft_obj_destroy(struct nft_object *obj)
 }
 
 static int nf_tables_delobj(struct net *net, struct sock *nlsk,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[])
+			    struct sk_buff *skb, const struct nlmsghdr *nlh,
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
@@ -4680,7 +4716,8 @@ err:
 
 static int nf_tables_getgen(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+			    const struct nlattr * const nla[],
+			    struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb2;
 	int err;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 80f5ecf2c3d7..92b05e188fd1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -201,7 +201,8 @@ replay:
 
 		if (nc->call_rcu) {
 			err = nc->call_rcu(net, net->nfnl, skb, nlh,
-					   (const struct nlattr **)cda);
+					   (const struct nlattr **)cda,
+					   extack);
 			rcu_read_unlock();
 		} else {
 			rcu_read_unlock();
@@ -211,7 +212,8 @@ replay:
 				err = -EAGAIN;
 			else if (nc->call)
 				err = nc->call(net, net->nfnl, skb, nlh,
-					       (const struct nlattr **)cda);
+					       (const struct nlattr **)cda,
+					       extack);
 			else
 				err = -EINVAL;
 			nfnl_unlock(subsys_id);
@@ -226,9 +228,11 @@ struct nfnl_err {
 	struct list_head	head;
 	struct nlmsghdr		*nlh;
 	int			err;
+	struct netlink_ext_ack	extack;
 };
 
-static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
+static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err,
+			const struct netlink_ext_ack *extack)
 {
 	struct nfnl_err *nfnl_err;
 
@@ -238,6 +242,7 @@ static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
 
 	nfnl_err->nlh = nlh;
 	nfnl_err->err = err;
+	nfnl_err->extack = *extack;
 	list_add_tail(&nfnl_err->head, list);
 
 	return 0;
@@ -262,7 +267,8 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
 	struct nfnl_err *nfnl_err, *next;
 
 	list_for_each_entry_safe(nfnl_err, next, err_list, head) {
-		netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL);
+		netlink_ack(skb, nfnl_err->nlh, nfnl_err->err,
+			    &nfnl_err->extack);
 		nfnl_err_del(nfnl_err);
 	}
 }
@@ -280,6 +286,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(skb->sk);
 	const struct nfnetlink_subsystem *ss;
 	const struct nfnl_callback *nc;
+	struct netlink_ext_ack extack;
 	LIST_HEAD(err_list);
 	u32 status;
 	int err;
@@ -325,6 +332,7 @@ replay:
 	while (skb->len >= nlmsg_total_size(0)) {
 		int msglen, type;
 
+		memset(&extack, 0, sizeof(extack));
 		nlh = nlmsg_hdr(skb);
 		err = 0;
 
@@ -384,7 +392,8 @@ replay:
 
 			if (nc->call_batch) {
 				err = nc->call_batch(net, net->nfnl, skb, nlh,
-						     (const struct nlattr **)cda);
+						     (const struct nlattr **)cda,
+						     &extack);
 			}
 
 			/* The lock was released to autoload some module, we
@@ -402,7 +411,7 @@ ack:
 			 * processed, this avoids that the same error is
 			 * reported several times when replaying the batch.
 			 */
-			if (nfnl_err_add(&err_list, nlh, err) < 0) {
+			if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
 				/* We failed to enqueue an error, reset the
 				 * list of errors and send OOM to userspace
 				 * pointing to the batch header.
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 9898fb4d0512..c45e6d4358ab 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -49,7 +49,8 @@ struct nfacct_filter {
 
 static int nfnl_acct_new(struct net *net, struct sock *nfnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const tb[])
+			 const struct nlattr * const tb[],
+			 struct netlink_ext_ack *extack)
 {
 	struct nf_acct *nfacct, *matching = NULL;
 	char *acct_name;
@@ -264,7 +265,8 @@ nfacct_filter_alloc(const struct nlattr * const attr)
 
 static int nfnl_acct_get(struct net *net, struct sock *nfnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const tb[])
+			 const struct nlattr * const tb[],
+			 struct netlink_ext_ack *extack)
 {
 	int ret = -ENOENT;
 	struct nf_acct *cur;
@@ -343,7 +345,8 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
 
 static int nfnl_acct_del(struct net *net, struct sock *nfnl,
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const tb[])
+			 const struct nlattr * const tb[],
+			 struct netlink_ext_ack *extack)
 {
 	struct nf_acct *cur, *tmp;
 	int ret = -ENOENT;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index be678a323598..41628b393673 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -398,7 +398,8 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
 
 static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const tb[])
+			     const struct nlattr * const tb[],
+			     struct netlink_ext_ack *extack)
 {
 	const char *helper_name;
 	struct nf_conntrack_helper *cur, *helper = NULL;
@@ -599,7 +600,8 @@ out:
 
 static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const tb[])
+			     const struct nlattr * const tb[],
+			     struct netlink_ext_ack *extack)
 {
 	int ret = -ENOENT;
 	struct nf_conntrack_helper *cur;
@@ -666,7 +668,8 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 
 static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const tb[])
+			     const struct nlattr * const tb[],
+			     struct netlink_ext_ack *extack)
 {
 	char *helper_name = NULL;
 	struct nf_conntrack_helper *cur;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index a3e7bb54d96a..400e9ae97153 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -69,7 +69,8 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
 static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	__u16 l3num;
 	__u8 l4num;
@@ -239,7 +240,8 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
 static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	int ret = -ENOENT;
 	char *name;
@@ -287,49 +289,20 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
 	return ret;
 }
 
-static void untimeout(struct nf_conntrack_tuple_hash *i,
-		      struct ctnl_timeout *timeout)
+static int untimeout(struct nf_conn *ct, void *timeout)
 {
-	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
 	struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
 
 	if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
 		RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+
+	/* We are not intended to delete this conntrack. */
+	return 0;
 }
 
 static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
 {
-	struct nf_conntrack_tuple_hash *h;
-	const struct hlist_nulls_node *nn;
-	unsigned int last_hsize;
-	spinlock_t *lock;
-	int i, cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
-		spin_lock_bh(&pcpu->lock);
-		hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
-			untimeout(h, timeout);
-		spin_unlock_bh(&pcpu->lock);
-	}
-
-	local_bh_disable();
-restart:
-	last_hsize = nf_conntrack_htable_size;
-	for (i = 0; i < last_hsize; i++) {
-		lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
-		nf_conntrack_lock(lock);
-		if (last_hsize != nf_conntrack_htable_size) {
-			spin_unlock(lock);
-			goto restart;
-		}
-
-		hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
-			untimeout(h, timeout);
-		spin_unlock(lock);
-	}
-	local_bh_enable();
+	nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0);
 }
 
 /* try to delete object, fail if it is still in use. */
@@ -355,7 +328,8 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
 static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	struct ctnl_timeout *cur, *tmp;
 	int ret = -ENOENT;
@@ -386,7 +360,8 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
 static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	__u16 l3num;
 	__u8 l4num;
@@ -475,7 +450,8 @@ nla_put_failure:
 static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 				 struct sk_buff *skb,
 				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[])
+				 const struct nlattr * const cda[],
+				 struct netlink_ext_ack *extack)
 {
 	__u16 l3num;
 	__u8 l4num;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 94ec0d0765a8..c684ba95dbb4 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -795,7 +795,8 @@ static struct notifier_block nfulnl_rtnl_notifier = {
 
 static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nfqa[])
+			      const struct nlattr * const nfqa[],
+			      struct netlink_ext_ack *extack)
 {
 	return -ENOTSUPP;
 }
@@ -818,7 +819,8 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
 
 static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nfula[])
+			      const struct nlattr * const nfula[],
+			      struct netlink_ext_ack *extack)
 {
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int16_t group_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 1b17a1b445a3..16fa04086880 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1032,7 +1032,8 @@ static int nfq_id_after(unsigned int id, unsigned int max)
 static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
 				    struct sk_buff *skb,
 				    const struct nlmsghdr *nlh,
-			            const struct nlattr * const nfqa[])
+			            const struct nlattr * const nfqa[],
+				    struct netlink_ext_ack *extack)
 {
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct nf_queue_entry *entry, *tmp;
@@ -1136,7 +1137,8 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry,
 static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
 			      struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
-			      const struct nlattr * const nfqa[])
+			      const struct nlattr * const nfqa[],
+			      struct netlink_ext_ack *extack)
 {
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
@@ -1200,7 +1202,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
 
 static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nfqa[])
+			     const struct nlattr * const nfqa[],
+			     struct netlink_ext_ack *extack)
 {
 	return -ENOTSUPP;
 }
@@ -1217,7 +1220,8 @@ static const struct nf_queue_handler nfqh = {
 
 static int nfqnl_recv_config(struct net *net, struct sock *ctnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nfqa[])
+			     const struct nlattr * const nfqa[],
+			     struct netlink_ext_ack *extack)
 {
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f753ec69f790..f5a7cb68694e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -530,7 +530,8 @@ nla_put_failure:
 
 static int nfnl_compat_get(struct net *net, struct sock *nfnl,
 			   struct sk_buff *skb, const struct nlmsghdr *nlh,
-			   const struct nlattr * const tb[])
+			   const struct nlattr * const tb[],
+			   struct netlink_ext_ack *extack)
 {
 	int ret = 0, target;
 	struct nfgenmsg *nfmsg;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index d3eb640bc784..c7383d8f88d0 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -23,9 +23,9 @@ struct nft_rt {
 	enum nft_registers	dreg:8;
 };
 
-void nft_rt_get_eval(const struct nft_expr *expr,
-		     struct nft_regs *regs,
-		     const struct nft_pktinfo *pkt)
+static void nft_rt_get_eval(const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_pktinfo *pkt)
 {
 	const struct nft_rt *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
@@ -72,9 +72,9 @@ const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
 	[NFTA_RT_KEY]		= { .type = NLA_U32 },
 };
 
-int nft_rt_get_init(const struct nft_ctx *ctx,
-		    const struct nft_expr *expr,
-		    const struct nlattr * const tb[])
+static int nft_rt_get_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
 {
 	struct nft_rt *priv = nft_expr_priv(expr);
 	unsigned int len;
@@ -103,8 +103,8 @@ int nft_rt_get_init(const struct nft_ctx *ctx,
 					   NFT_DATA_VALUE, len);
 }
 
-int nft_rt_get_dump(struct sk_buff *skb,
-		    const struct nft_expr *expr)
+static int nft_rt_get_dump(struct sk_buff *skb,
+			   const struct nft_expr *expr)
 {
 	const struct nft_rt *priv = nft_expr_priv(expr);
 
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index b988162b5b15..734989c40579 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -236,7 +236,8 @@ static inline u32 nft_bitmap_total_size(u32 klen)
 	return sizeof(struct nft_bitmap) + nft_bitmap_size(klen);
 }
 
-static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[])
+static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[],
+					const struct nft_set_desc *desc)
 {
 	u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
 
@@ -278,7 +279,9 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
 	return true;
 }
 
+static struct nft_set_type nft_bitmap_type;
 static struct nft_set_ops nft_bitmap_ops __read_mostly = {
+	.type		= &nft_bitmap_type,
 	.privsize	= nft_bitmap_privsize,
 	.elemsize	= offsetof(struct nft_bitmap_elem, ext),
 	.estimate	= nft_bitmap_estimate,
@@ -291,17 +294,21 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = {
 	.activate	= nft_bitmap_activate,
 	.lookup		= nft_bitmap_lookup,
 	.walk		= nft_bitmap_walk,
+};
+
+static struct nft_set_type nft_bitmap_type __read_mostly = {
+	.ops		= &nft_bitmap_ops,
 	.owner		= THIS_MODULE,
 };
 
 static int __init nft_bitmap_module_init(void)
 {
-	return nft_register_set(&nft_bitmap_ops);
+	return nft_register_set(&nft_bitmap_type);
 }
 
 static void __exit nft_bitmap_module_exit(void)
 {
-	nft_unregister_set(&nft_bitmap_ops);
+	nft_unregister_set(&nft_bitmap_type);
 }
 
 module_init(nft_bitmap_module_init);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 3d3a6df4ce70..0fa01d772c5e 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -22,45 +22,43 @@
 #include <net/netfilter/nf_tables.h>
 
 /* We target a hash table size of 4, element hint is 75% of final size */
-#define NFT_HASH_ELEMENT_HINT 3
+#define NFT_RHASH_ELEMENT_HINT 3
 
-struct nft_hash {
+struct nft_rhash {
 	struct rhashtable		ht;
 	struct delayed_work		gc_work;
 };
 
-struct nft_hash_elem {
+struct nft_rhash_elem {
 	struct rhash_head		node;
 	struct nft_set_ext		ext;
 };
 
-struct nft_hash_cmp_arg {
+struct nft_rhash_cmp_arg {
 	const struct nft_set		*set;
 	const u32			*key;
 	u8				genmask;
 };
 
-static const struct rhashtable_params nft_hash_params;
-
-static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
+static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
 {
-	const struct nft_hash_cmp_arg *arg = data;
+	const struct nft_rhash_cmp_arg *arg = data;
 
 	return jhash(arg->key, len, seed);
 }
 
-static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
+static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed)
 {
-	const struct nft_hash_elem *he = data;
+	const struct nft_rhash_elem *he = data;
 
 	return jhash(nft_set_ext_key(&he->ext), len, seed);
 }
 
-static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
-			       const void *ptr)
+static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
+				const void *ptr)
 {
-	const struct nft_hash_cmp_arg *x = arg->key;
-	const struct nft_hash_elem *he = ptr;
+	const struct nft_rhash_cmp_arg *x = arg->key;
+	const struct nft_rhash_elem *he = ptr;
 
 	if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
 		return 1;
@@ -71,41 +69,49 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
 	return 0;
 }
 
-static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
-			    const u32 *key, const struct nft_set_ext **ext)
+static const struct rhashtable_params nft_rhash_params = {
+	.head_offset		= offsetof(struct nft_rhash_elem, node),
+	.hashfn			= nft_rhash_key,
+	.obj_hashfn		= nft_rhash_obj,
+	.obj_cmpfn		= nft_rhash_cmp,
+	.automatic_shrinking	= true,
+};
+
+static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
+			     const u32 *key, const struct nft_set_ext **ext)
 {
-	struct nft_hash *priv = nft_set_priv(set);
-	const struct nft_hash_elem *he;
-	struct nft_hash_cmp_arg arg = {
+	struct nft_rhash *priv = nft_set_priv(set);
+	const struct nft_rhash_elem *he;
+	struct nft_rhash_cmp_arg arg = {
 		.genmask = nft_genmask_cur(net),
 		.set	 = set,
 		.key	 = key,
 	};
 
-	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
 	if (he != NULL)
 		*ext = &he->ext;
 
 	return !!he;
 }
 
-static bool nft_hash_update(struct nft_set *set, const u32 *key,
-			    void *(*new)(struct nft_set *,
-					 const struct nft_expr *,
-					 struct nft_regs *regs),
-			    const struct nft_expr *expr,
-			    struct nft_regs *regs,
-			    const struct nft_set_ext **ext)
+static bool nft_rhash_update(struct nft_set *set, const u32 *key,
+			     void *(*new)(struct nft_set *,
+					  const struct nft_expr *,
+					  struct nft_regs *regs),
+			     const struct nft_expr *expr,
+			     struct nft_regs *regs,
+			     const struct nft_set_ext **ext)
 {
-	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he, *prev;
-	struct nft_hash_cmp_arg arg = {
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he, *prev;
+	struct nft_rhash_cmp_arg arg = {
 		.genmask = NFT_GENMASK_ANY,
 		.set	 = set,
 		.key	 = key,
 	};
 
-	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
 	if (he != NULL)
 		goto out;
 
@@ -114,7 +120,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
 		goto err1;
 
 	prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
-						nft_hash_params);
+						nft_rhash_params);
 	if (IS_ERR(prev))
 		goto err2;
 
@@ -134,21 +140,21 @@ err1:
 	return false;
 }
 
-static int nft_hash_insert(const struct net *net, const struct nft_set *set,
-			   const struct nft_set_elem *elem,
-			   struct nft_set_ext **ext)
+static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
+			    const struct nft_set_elem *elem,
+			    struct nft_set_ext **ext)
 {
-	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he = elem->priv;
-	struct nft_hash_cmp_arg arg = {
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he = elem->priv;
+	struct nft_rhash_cmp_arg arg = {
 		.genmask = nft_genmask_next(net),
 		.set	 = set,
 		.key	 = elem->key.val.data,
 	};
-	struct nft_hash_elem *prev;
+	struct nft_rhash_elem *prev;
 
 	prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
-					       nft_hash_params);
+						nft_rhash_params);
 	if (IS_ERR(prev))
 		return PTR_ERR(prev);
 	if (prev) {
@@ -158,19 +164,19 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set,
 	return 0;
 }
 
-static void nft_hash_activate(const struct net *net, const struct nft_set *set,
-			      const struct nft_set_elem *elem)
+static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
+			       const struct nft_set_elem *elem)
 {
-	struct nft_hash_elem *he = elem->priv;
+	struct nft_rhash_elem *he = elem->priv;
 
 	nft_set_elem_change_active(net, set, &he->ext);
 	nft_set_elem_clear_busy(&he->ext);
 }
 
-static bool nft_hash_flush(const struct net *net,
-			   const struct nft_set *set, void *priv)
+static bool nft_rhash_flush(const struct net *net,
+			    const struct nft_set *set, void *priv)
 {
-	struct nft_hash_elem *he = priv;
+	struct nft_rhash_elem *he = priv;
 
 	if (!nft_set_elem_mark_busy(&he->ext) ||
 	    !nft_is_active(net, &he->ext)) {
@@ -180,22 +186,22 @@ static bool nft_hash_flush(const struct net *net,
 	return false;
 }
 
-static void *nft_hash_deactivate(const struct net *net,
-				 const struct nft_set *set,
-				 const struct nft_set_elem *elem)
+static void *nft_rhash_deactivate(const struct net *net,
+				  const struct nft_set *set,
+				  const struct nft_set_elem *elem)
 {
-	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he;
-	struct nft_hash_cmp_arg arg = {
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he;
+	struct nft_rhash_cmp_arg arg = {
 		.genmask = nft_genmask_next(net),
 		.set	 = set,
 		.key	 = elem->key.val.data,
 	};
 
 	rcu_read_lock();
-	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
 	if (he != NULL &&
-	    !nft_hash_flush(net, set, he))
+	    !nft_rhash_flush(net, set, he))
 		he = NULL;
 
 	rcu_read_unlock();
@@ -203,21 +209,21 @@ static void *nft_hash_deactivate(const struct net *net,
 	return he;
 }
 
-static void nft_hash_remove(const struct net *net,
-			    const struct nft_set *set,
-			    const struct nft_set_elem *elem)
+static void nft_rhash_remove(const struct net *net,
+			     const struct nft_set *set,
+			     const struct nft_set_elem *elem)
 {
-	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he = elem->priv;
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he = elem->priv;
 
-	rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+	rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
 }
 
-static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
-			  struct nft_set_iter *iter)
+static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+			   struct nft_set_iter *iter)
 {
-	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he;
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he;
 	struct rhashtable_iter hti;
 	struct nft_set_elem elem;
 	int err;
@@ -266,16 +272,16 @@ out:
 	rhashtable_walk_exit(&hti);
 }
 
-static void nft_hash_gc(struct work_struct *work)
+static void nft_rhash_gc(struct work_struct *work)
 {
 	struct nft_set *set;
-	struct nft_hash_elem *he;
-	struct nft_hash *priv;
+	struct nft_rhash_elem *he;
+	struct nft_rhash *priv;
 	struct nft_set_gc_batch *gcb = NULL;
 	struct rhashtable_iter hti;
 	int err;
 
-	priv = container_of(work, struct nft_hash, gc_work.work);
+	priv = container_of(work, struct nft_rhash, gc_work.work);
 	set  = nft_set_container_of(priv);
 
 	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
@@ -301,7 +307,7 @@ static void nft_hash_gc(struct work_struct *work)
 		gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
 		if (gcb == NULL)
 			goto out;
-		rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+		rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
 		atomic_dec(&set->nelems);
 		nft_set_gc_batch_add(gcb, he);
 	}
@@ -315,82 +321,290 @@ schedule:
 			   nft_set_gc_interval(set));
 }
 
-static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+static unsigned int nft_rhash_privsize(const struct nlattr * const nla[],
+				       const struct nft_set_desc *desc)
 {
-	return sizeof(struct nft_hash);
+	return sizeof(struct nft_rhash);
 }
 
-static const struct rhashtable_params nft_hash_params = {
-	.head_offset		= offsetof(struct nft_hash_elem, node),
-	.hashfn			= nft_hash_key,
-	.obj_hashfn		= nft_hash_obj,
-	.obj_cmpfn		= nft_hash_cmp,
-	.automatic_shrinking	= true,
-};
-
-static int nft_hash_init(const struct nft_set *set,
-			 const struct nft_set_desc *desc,
-			 const struct nlattr * const tb[])
+static int nft_rhash_init(const struct nft_set *set,
+			  const struct nft_set_desc *desc,
+			  const struct nlattr * const tb[])
 {
-	struct nft_hash *priv = nft_set_priv(set);
-	struct rhashtable_params params = nft_hash_params;
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct rhashtable_params params = nft_rhash_params;
 	int err;
 
-	params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
+	params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT;
 	params.key_len	  = set->klen;
 
 	err = rhashtable_init(&priv->ht, &params);
 	if (err < 0)
 		return err;
 
-	INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+	INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
 	if (set->flags & NFT_SET_TIMEOUT)
 		queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
 				   nft_set_gc_interval(set));
 	return 0;
 }
 
-static void nft_hash_elem_destroy(void *ptr, void *arg)
+static void nft_rhash_elem_destroy(void *ptr, void *arg)
 {
 	nft_set_elem_destroy(arg, ptr, true);
 }
 
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_rhash_destroy(const struct nft_set *set)
 {
-	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_rhash *priv = nft_set_priv(set);
 
 	cancel_delayed_work_sync(&priv->gc_work);
-	rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+	rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
 				    (void *)set);
 }
 
-static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
-			      struct nft_set_estimate *est)
+static u32 nft_hash_buckets(u32 size)
 {
-	unsigned int esize;
+	return roundup_pow_of_two(size * 4 / 3);
+}
 
-	esize = sizeof(struct nft_hash_elem);
-	if (desc->size) {
-		est->size = sizeof(struct nft_hash) +
-			    roundup_pow_of_two(desc->size * 4 / 3) *
-			    sizeof(struct nft_hash_elem *) +
-			    desc->size * esize;
-	} else {
-		/* Resizing happens when the load drops below 30% or goes
-		 * above 75%. The average of 52.5% load (approximated by 50%)
-		 * is used for the size estimation of the hash buckets,
-		 * meaning we calculate two buckets per element.
-		 */
-		est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
+			       struct nft_set_estimate *est)
+{
+	est->size   = ~0;
+	est->lookup = NFT_SET_CLASS_O_1;
+	est->space  = NFT_SET_CLASS_O_N;
+
+	return true;
+}
+
+struct nft_hash {
+	u32				seed;
+	u32				buckets;
+	struct hlist_head		table[];
+};
+
+struct nft_hash_elem {
+	struct hlist_node		node;
+	struct nft_set_ext		ext;
+};
+
+static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+			    const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_cur(net);
+	const struct nft_hash_elem *he;
+	u32 hash;
+
+	hash = jhash(key, set->klen, priv->seed);
+	hash = reciprocal_scale(hash, priv->buckets);
+	hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+		if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
+		    nft_set_elem_active(&he->ext, genmask)) {
+			*ext = &he->ext;
+			return true;
+		}
+	}
+	return false;
+}
+
+/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
+static inline u32 nft_hash_key(const u32 *key, u32 klen)
+{
+	if (klen == 4)
+		return *key;
+
+	return *(u16 *)key;
+}
+
+static bool nft_hash_lookup_fast(const struct net *net,
+				 const struct nft_set *set,
+				 const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_cur(net);
+	const struct nft_hash_elem *he;
+	u32 hash, k1, k2;
+
+	k1 = nft_hash_key(key, set->klen);
+	hash = jhash_1word(k1, priv->seed);
+	hash = reciprocal_scale(hash, priv->buckets);
+	hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+		k2 = nft_hash_key(nft_set_ext_key(&he->ext)->data, set->klen);
+		if (k1 == k2 &&
+		    nft_set_elem_active(&he->ext, genmask)) {
+			*ext = &he->ext;
+			return true;
+		}
+	}
+	return false;
+}
+
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
+			   const struct nft_set_elem *elem,
+			   struct nft_set_ext **ext)
+{
+	struct nft_hash_elem *this = elem->priv, *he;
+	struct nft_hash *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_next(net);
+	u32 hash;
+
+	hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
+	hash = reciprocal_scale(hash, priv->buckets);
+	hlist_for_each_entry(he, &priv->table[hash], node) {
+		if (!memcmp(nft_set_ext_key(&this->ext),
+			    nft_set_ext_key(&he->ext), set->klen) &&
+		    nft_set_elem_active(&he->ext, genmask)) {
+			*ext = &he->ext;
+			return -EEXIST;
+		}
+	}
+	hlist_add_head_rcu(&this->node, &priv->table[hash]);
+	return 0;
+}
+
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
+			      const struct nft_set_elem *elem)
+{
+	struct nft_hash_elem *he = elem->priv;
+
+	nft_set_elem_change_active(net, set, &he->ext);
+}
+
+static bool nft_hash_flush(const struct net *net,
+			   const struct nft_set *set, void *priv)
+{
+	struct nft_hash_elem *he = priv;
+
+	nft_set_elem_change_active(net, set, &he->ext);
+	return true;
+}
+
+static void *nft_hash_deactivate(const struct net *net,
+				 const struct nft_set *set,
+				 const struct nft_set_elem *elem)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *this = elem->priv, *he;
+	u8 genmask = nft_genmask_next(net);
+	u32 hash;
+
+	hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
+	hash = reciprocal_scale(hash, priv->buckets);
+	hlist_for_each_entry(he, &priv->table[hash], node) {
+		if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
+			    set->klen) ||
+		    nft_set_elem_active(&he->ext, genmask)) {
+			nft_set_elem_change_active(net, set, &he->ext);
+			return he;
+		}
 	}
+	return NULL;
+}
+
+static void nft_hash_remove(const struct net *net,
+			    const struct nft_set *set,
+			    const struct nft_set_elem *elem)
+{
+	struct nft_hash_elem *he = elem->priv;
+
+	hlist_del_rcu(&he->node);
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+			  struct nft_set_iter *iter)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	struct nft_set_elem elem;
+	int i;
+
+	for (i = 0; i < priv->buckets; i++) {
+		hlist_for_each_entry_rcu(he, &priv->table[i], node) {
+			if (iter->count < iter->skip)
+				goto cont;
+			if (!nft_set_elem_active(&he->ext, iter->genmask))
+				goto cont;
+
+			elem.priv = he;
+
+			iter->err = iter->fn(ctx, set, iter, &elem);
+			if (iter->err < 0)
+				return;
+cont:
+			iter->count++;
+		}
+	}
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[],
+				      const struct nft_set_desc *desc)
+{
+	return sizeof(struct nft_hash) +
+	       nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
+}
 
+static int nft_hash_init(const struct nft_set *set,
+			 const struct nft_set_desc *desc,
+			 const struct nlattr * const tb[])
+{
+	struct nft_hash *priv = nft_set_priv(set);
+
+	priv->buckets = nft_hash_buckets(desc->size);
+	get_random_bytes(&priv->seed, sizeof(priv->seed));
+
+	return 0;
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	struct hlist_node *next;
+	int i;
+
+	for (i = 0; i < priv->buckets; i++) {
+		hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
+			hlist_del_rcu(&he->node);
+			nft_set_elem_destroy(set, he, true);
+		}
+	}
+}
+
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+			      struct nft_set_estimate *est)
+{
+	est->size   = sizeof(struct nft_hash) +
+		      nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+		      desc->size * sizeof(struct nft_hash_elem);
 	est->lookup = NFT_SET_CLASS_O_1;
 	est->space  = NFT_SET_CLASS_O_N;
 
 	return true;
 }
 
+static struct nft_set_type nft_hash_type;
+static struct nft_set_ops nft_rhash_ops __read_mostly = {
+	.type		= &nft_hash_type,
+	.privsize       = nft_rhash_privsize,
+	.elemsize	= offsetof(struct nft_rhash_elem, ext),
+	.estimate	= nft_rhash_estimate,
+	.init		= nft_rhash_init,
+	.destroy	= nft_rhash_destroy,
+	.insert		= nft_rhash_insert,
+	.activate	= nft_rhash_activate,
+	.deactivate	= nft_rhash_deactivate,
+	.flush		= nft_rhash_flush,
+	.remove		= nft_rhash_remove,
+	.lookup		= nft_rhash_lookup,
+	.update		= nft_rhash_update,
+	.walk		= nft_rhash_walk,
+	.features	= NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
+};
+
 static struct nft_set_ops nft_hash_ops __read_mostly = {
+	.type		= &nft_hash_type,
 	.privsize       = nft_hash_privsize,
 	.elemsize	= offsetof(struct nft_hash_elem, ext),
 	.estimate	= nft_hash_estimate,
@@ -402,20 +616,57 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
 	.flush		= nft_hash_flush,
 	.remove		= nft_hash_remove,
 	.lookup		= nft_hash_lookup,
-	.update		= nft_hash_update,
 	.walk		= nft_hash_walk,
-	.features	= NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
+	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
+};
+
+static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
+	.type		= &nft_hash_type,
+	.privsize       = nft_hash_privsize,
+	.elemsize	= offsetof(struct nft_hash_elem, ext),
+	.estimate	= nft_hash_estimate,
+	.init		= nft_hash_init,
+	.destroy	= nft_hash_destroy,
+	.insert		= nft_hash_insert,
+	.activate	= nft_hash_activate,
+	.deactivate	= nft_hash_deactivate,
+	.flush		= nft_hash_flush,
+	.remove		= nft_hash_remove,
+	.lookup		= nft_hash_lookup_fast,
+	.walk		= nft_hash_walk,
+	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
+};
+
+static const struct nft_set_ops *
+nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
+		    u32 flags)
+{
+	if (desc->size) {
+		switch (desc->klen) {
+		case 2:
+		case 4:
+			return &nft_hash_fast_ops;
+		default:
+			return &nft_hash_ops;
+		}
+	}
+
+	return &nft_rhash_ops;
+}
+
+static struct nft_set_type nft_hash_type __read_mostly = {
+	.select_ops	= nft_hash_select_ops,
 	.owner		= THIS_MODULE,
 };
 
 static int __init nft_hash_module_init(void)
 {
-	return nft_register_set(&nft_hash_ops);
+	return nft_register_set(&nft_hash_type);
 }
 
 static void __exit nft_hash_module_exit(void)
 {
-	nft_unregister_set(&nft_hash_ops);
+	nft_unregister_set(&nft_hash_type);
 }
 
 module_init(nft_hash_module_init);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index fbdbaa00dd5f..bce5382f1d49 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -251,7 +251,8 @@ cont:
 	read_unlock_bh(&priv->lock);
 }
 
-static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
+static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[],
+					const struct nft_set_desc *desc)
 {
 	return sizeof(struct nft_rbtree);
 }
@@ -283,13 +284,11 @@ static void nft_rbtree_destroy(const struct nft_set *set)
 static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
 				struct nft_set_estimate *est)
 {
-	unsigned int nsize;
-
-	nsize = sizeof(struct nft_rbtree_elem);
 	if (desc->size)
-		est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+		est->size = sizeof(struct nft_rbtree) +
+			    desc->size * sizeof(struct nft_rbtree_elem);
 	else
-		est->size = nsize;
+		est->size = ~0;
 
 	est->lookup = NFT_SET_CLASS_O_LOG_N;
 	est->space  = NFT_SET_CLASS_O_N;
@@ -297,7 +296,9 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
 	return true;
 }
 
+static struct nft_set_type nft_rbtree_type;
 static struct nft_set_ops nft_rbtree_ops __read_mostly = {
+	.type		= &nft_rbtree_type,
 	.privsize	= nft_rbtree_privsize,
 	.elemsize	= offsetof(struct nft_rbtree_elem, ext),
 	.estimate	= nft_rbtree_estimate,
@@ -311,17 +312,21 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
 	.lookup		= nft_rbtree_lookup,
 	.walk		= nft_rbtree_walk,
 	.features	= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
+};
+
+static struct nft_set_type nft_rbtree_type __read_mostly = {
+	.ops		= &nft_rbtree_ops,
 	.owner		= THIS_MODULE,
 };
 
 static int __init nft_rbtree_module_init(void)
 {
-	return nft_register_set(&nft_rbtree_ops);
+	return nft_register_set(&nft_rbtree_type);
 }
 
 static void __exit nft_rbtree_module_exit(void)
 {
-	nft_unregister_set(&nft_rbtree_ops);
+	nft_unregister_set(&nft_rbtree_type);
 }
 
 module_init(nft_rbtree_module_init);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index df7f1df00330..d767e35fff6b 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -127,7 +127,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
 						    daddr, dport,
 						    in->ifindex);
 
-			if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+			if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 				sk = NULL;
 			/* NOTE: we return listeners even if bound to
 			 * 0.0.0.0, those are filtered out in
@@ -197,7 +197,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
 						   daddr, ntohs(dport),
 						   in->ifindex);
 
-			if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+			if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 				sk = NULL;
 			/* NOTE: we return listeners even if bound to
 			 * 0.0.0.0, those are filtered out in
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index c05fefcec238..71cfa9551d08 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -63,7 +63,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
 
 static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
 			       struct sk_buff *skb, const struct nlmsghdr *nlh,
-			       const struct nlattr * const osf_attrs[])
+			       const struct nlattr * const osf_attrs[],
+			       struct netlink_ext_ack *extack)
 {
 	struct xt_osf_user_finger *f;
 	struct xt_osf_finger *kf = NULL, *sf;
@@ -107,7 +108,8 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
 static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
 				  struct sk_buff *skb,
 				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const osf_attrs[])
+				  const struct nlattr * const osf_attrs[],
+				  struct netlink_ext_ack *extack)
 {
 	struct xt_osf_user_finger *f;
 	struct xt_osf_finger *sf;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 4dedb96d1a06..2d2fa1d53ea6 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -42,8 +42,8 @@ match_packet(const struct sk_buff *skb,
 	     bool *hotdrop)
 {
 	u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
-	const sctp_chunkhdr_t *sch;
-	sctp_chunkhdr_t _sch;
+	const struct sctp_chunkhdr *sch;
+	struct sctp_chunkhdr _sch;
 	int chunk_match_type = info->chunk_match_type;
 	const struct xt_sctp_flag_info *flag_info = info->flag_info;
 	int flag_count = info->flag_count;
@@ -118,8 +118,8 @@ static bool
 sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
-	const sctp_sctphdr_t *sh;
-	sctp_sctphdr_t _sh;
+	const struct sctphdr *sh;
+	struct sctphdr _sh;
 
 	if (par->fragoff != 0) {
 		pr_debug("Dropping non-first fragment.. FIXME\n");
@@ -136,13 +136,13 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	return  SCCHECK(ntohs(sh->source) >= info->spts[0]
 			&& ntohs(sh->source) <= info->spts[1],
-			XT_SCTP_SRC_PORTS, info->flags, info->invflags)
-		&& SCCHECK(ntohs(sh->dest) >= info->dpts[0]
+			XT_SCTP_SRC_PORTS, info->flags, info->invflags) &&
+		SCCHECK(ntohs(sh->dest) >= info->dpts[0]
 			&& ntohs(sh->dest) <= info->dpts[1],
-			XT_SCTP_DEST_PORTS, info->flags, info->invflags)
-		&& SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
-					info, &par->hotdrop),
-			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
+			XT_SCTP_DEST_PORTS, info->flags, info->invflags) &&
+		SCCHECK(match_packet(skb, par->thoff + sizeof(_sh),
+				     info, &par->hotdrop),
+			XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
 static int sctp_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a88745e4b7df..5acee49db90b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -372,7 +372,7 @@ static void netlink_sock_destruct(struct sock *sk)
 	}
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 	WARN_ON(nlk_sk(sk)->groups);
 }
 
@@ -575,7 +575,7 @@ static void netlink_remove(struct sock *sk)
 	table = &nl_table[sk->sk_protocol];
 	if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node,
 				    netlink_rhashtable_params)) {
-		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
 
@@ -691,7 +691,7 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
 	struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
 	struct sock *sk = &nlk->sk;
 
-	if (!atomic_dec_and_test(&sk->sk_refcnt))
+	if (!refcount_dec_and_test(&sk->sk_refcnt))
 		return;
 
 	if (nlk->cb_running && nlk->cb.done) {
@@ -1848,7 +1848,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	if (dst_group) {
-		atomic_inc(&skb->users);
+		refcount_inc(&skb->users);
 		netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
 	}
 	err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
@@ -2226,7 +2226,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	struct netlink_sock *nlk;
 	int ret;
 
-	atomic_inc(&skb->users);
+	refcount_inc(&skb->users);
 
 	sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
 	if (sk == NULL) {
@@ -2431,7 +2431,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
 		int exclude_portid = 0;
 
 		if (report) {
-			atomic_inc(&skb->users);
+			refcount_inc(&skb->users);
 			exclude_portid = portid;
 		}
 
@@ -2568,7 +2568,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
 			   sk_rmem_alloc_get(s),
 			   sk_wmem_alloc_get(s),
 			   nlk->cb_running,
-			   atomic_read(&s->sk_refcnt),
+			   refcount_read(&s->sk_refcnt),
 			   atomic_read(&s->sk_drops),
 			   sock_i_ino(s)
 			);
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 122bb81da918..5cf33df888c3 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -982,6 +982,8 @@ static void nfc_release(struct device *d)
 			kfree(se);
 	}
 
+	ida_simple_remove(&nfc_index_ida, dev->idx);
+
 	kfree(dev);
 }
 
@@ -1056,6 +1058,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
 				    int tx_headroom, int tx_tailroom)
 {
 	struct nfc_dev *dev;
+	int rc;
 
 	if (!ops->start_poll || !ops->stop_poll || !ops->activate_target ||
 	    !ops->deactivate_target || !ops->im_transceive)
@@ -1068,6 +1071,15 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
 	if (!dev)
 		return NULL;
 
+	rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL);
+	if (rc < 0)
+		goto err_free_dev;
+	dev->idx = rc;
+
+	dev->dev.class = &nfc_class;
+	dev_set_name(&dev->dev, "nfc%d", dev->idx);
+	device_initialize(&dev->dev);
+
 	dev->ops = ops;
 	dev->supported_protocols = supported_protocols;
 	dev->tx_headroom = tx_headroom;
@@ -1090,6 +1102,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
 	}
 
 	return dev;
+
+err_free_dev:
+	kfree(dev);
+
+	return ERR_PTR(rc);
 }
 EXPORT_SYMBOL(nfc_allocate_device);
 
@@ -1104,14 +1121,6 @@ int nfc_register_device(struct nfc_dev *dev)
 
 	pr_debug("dev_name=%s\n", dev_name(&dev->dev));
 
-	dev->idx = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL);
-	if (dev->idx < 0)
-		return dev->idx;
-
-	dev->dev.class = &nfc_class;
-	dev_set_name(&dev->dev, "nfc%d", dev->idx);
-	device_initialize(&dev->dev);
-
 	mutex_lock(&nfc_devlist_mutex);
 	nfc_devlist_generation++;
 	rc = device_add(&dev->dev);
@@ -1149,12 +1158,10 @@ EXPORT_SYMBOL(nfc_register_device);
  */
 void nfc_unregister_device(struct nfc_dev *dev)
 {
-	int rc, id;
+	int rc;
 
 	pr_debug("dev_name=%s\n", dev_name(&dev->dev));
 
-	id = dev->idx;
-
 	if (dev->rfkill) {
 		rfkill_unregister(dev->rfkill);
 		rfkill_destroy(dev->rfkill);
@@ -1179,8 +1186,6 @@ void nfc_unregister_device(struct nfc_dev *dev)
 	nfc_devlist_generation++;
 	device_del(&dev->dev);
 	mutex_unlock(&nfc_devlist_mutex);
-
-	ida_simple_remove(&nfc_index_ida, id);
 }
 EXPORT_SYMBOL(nfc_unregister_device);
 
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index ebeace7a8278..de6dd37d04c7 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -240,7 +240,7 @@ int digital_send_cmd(struct nfc_digital_dev *ddev, u8 cmd_type,
 {
 	struct digital_cmd *cmd;
 
-	cmd = kzalloc(sizeof(struct digital_cmd), GFP_KERNEL);
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
 	if (!cmd)
 		return -ENOMEM;
 
@@ -287,7 +287,7 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
 {
 	struct digital_tg_mdaa_params *params;
 
-	params = kzalloc(sizeof(struct digital_tg_mdaa_params), GFP_KERNEL);
+	params = kzalloc(sizeof(*params), GFP_KERNEL);
 	if (!params)
 		return -ENOMEM;
 
@@ -706,11 +706,9 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target,
 	struct digital_data_exch *data_exch;
 	int rc;
 
-	data_exch = kzalloc(sizeof(struct digital_data_exch), GFP_KERNEL);
-	if (!data_exch) {
-		pr_err("Failed to allocate data_exch struct\n");
+	data_exch = kzalloc(sizeof(*data_exch), GFP_KERNEL);
+	if (!data_exch)
 		return -ENOMEM;
-	}
 
 	data_exch->cb = cb;
 	data_exch->cb_context = cb_context;
@@ -764,7 +762,7 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
 	    !ops->switch_rf || (ops->tg_listen_md && !ops->tg_get_rf_tech))
 		return NULL;
 
-	ddev = kzalloc(sizeof(struct nfc_digital_dev), GFP_KERNEL);
+	ddev = kzalloc(sizeof(*ddev), GFP_KERNEL);
 	if (!ddev)
 		return NULL;
 
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index 74ccc2dd79d0..4f9a973988b2 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -151,7 +151,7 @@ static const u8 digital_payload_bits_map[4] = {
  *  0 <= wt <= 14 (given by the target by the TO field of ATR_RES response)
  */
 #define DIGITAL_NFC_DEP_IN_MAX_WT 14
-#define DIGITAL_NFC_DEP_TG_MAX_WT 8
+#define DIGITAL_NFC_DEP_TG_MAX_WT 14
 static const u16 digital_rwt_map[DIGITAL_NFC_DEP_IN_MAX_WT + 1] = {
 	100,  101,  101,  102,  105,
 	110,  119,  139,  177,  255,
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 3cc3448da524..2021d1d58a75 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -27,6 +27,7 @@
 
 #define DIGITAL_SDD_RES_CT  0x88
 #define DIGITAL_SDD_RES_LEN 5
+#define DIGITAL_SEL_RES_LEN 1
 
 #define DIGITAL_SEL_RES_NFCID1_COMPLETE(sel_res) (!((sel_res) & 0x04))
 #define DIGITAL_SEL_RES_IS_T2T(sel_res) (!((sel_res) & 0x60))
@@ -299,7 +300,7 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg,
 		}
 	}
 
-	if (!resp->len) {
+	if (resp->len != DIGITAL_SEL_RES_LEN) {
 		rc = -EIO;
 		goto exit;
 	}
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 2ffb18e73df6..fb7afcaa3004 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -77,7 +77,8 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
 	struct sockaddr_nfc_llcp llcp_addr;
 	int len, ret = 0;
 
-	if (!addr || addr->sa_family != AF_NFC)
+	if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+	    addr->sa_family != AF_NFC)
 		return -EINVAL;
 
 	pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
@@ -151,7 +152,8 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr,
 	struct sockaddr_nfc_llcp llcp_addr;
 	int len, ret = 0;
 
-	if (!addr || addr->sa_family != AF_NFC)
+	if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+	    addr->sa_family != AF_NFC)
 		return -EINVAL;
 
 	pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
@@ -662,8 +664,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
 
 	pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags);
 
-	if (!addr || len < sizeof(struct sockaddr_nfc) ||
-	    addr->sa_family != AF_NFC)
+	if (!addr || len < sizeof(*addr) || addr->sa_family != AF_NFC)
 		return -EINVAL;
 
 	if (addr->service_name_len == 0 && addr->dsap == 0)
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index a3dac34cf790..c25e9b4179c3 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -73,11 +73,10 @@ int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
 		if (conn_info->dest_type == dest_type) {
 			if (!params)
 				return conn_info->conn_id;
-			if (conn_info) {
-				if (params->id == conn_info->dest_params->id &&
-				    params->protocol == conn_info->dest_params->protocol)
-					return conn_info->conn_id;
-			}
+
+			if (params->id == conn_info->dest_params->id &&
+			    params->protocol == conn_info->dest_params->protocol)
+				return conn_info->conn_id;
 		}
 	}
 
@@ -1173,8 +1172,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
 	return ndev;
 
 free_nfc:
-	kfree(ndev->nfc_dev);
-
+	nfc_free_device(ndev->nfc_dev);
 free_nci:
 	kfree(ndev);
 	return NULL;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 6b0850e63e09..b251fb936a27 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -907,7 +907,9 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
 	u32 device_idx, target_idx, protocol;
 	int rc;
 
-	if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+	    !info->attrs[NFC_ATTR_TARGET_INDEX] ||
+	    !info->attrs[NFC_ATTR_PROTOCOLS])
 		return -EINVAL;
 
 	device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d772e9a4b4f8..45fe8c8a884d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1090,6 +1090,58 @@ static struct sw_flow_actions *get_flow_actions(struct net *net,
 	return acts;
 }
 
+/* Factor out match-init and action-copy to avoid
+ * "Wframe-larger-than=1024" warning. Because mask is only
+ * used to get actions, we new a function to save some
+ * stack space.
+ *
+ * If there are not key and action attrs, we return 0
+ * directly. In the case, the caller will also not use the
+ * match as before. If there is action attr, we try to get
+ * actions and save them to *acts. Before returning from
+ * the function, we reset the match->mask pointer. Because
+ * we should not to return match object with dangling reference
+ * to mask.
+ * */
+static int ovs_nla_init_match_and_action(struct net *net,
+					 struct sw_flow_match *match,
+					 struct sw_flow_key *key,
+					 struct nlattr **a,
+					 struct sw_flow_actions **acts,
+					 bool log)
+{
+	struct sw_flow_mask mask;
+	int error = 0;
+
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(match, key, true, &mask);
+		error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
+					  a[OVS_FLOW_ATTR_MASK], log);
+		if (error)
+			goto error;
+	}
+
+	if (a[OVS_FLOW_ATTR_ACTIONS]) {
+		if (!a[OVS_FLOW_ATTR_KEY]) {
+			OVS_NLERR(log,
+				  "Flow key attribute not present in set flow.");
+			return -EINVAL;
+		}
+
+		*acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
+					 &mask, log);
+		if (IS_ERR(*acts)) {
+			error = PTR_ERR(*acts);
+			goto error;
+		}
+	}
+
+	/* On success, error is 0. */
+error:
+	match->mask = NULL;
+	return error;
+}
+
 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1097,7 +1149,6 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sw_flow *flow;
-	struct sw_flow_mask mask;
 	struct sk_buff *reply = NULL;
 	struct datapath *dp;
 	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
@@ -1109,34 +1160,18 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	bool ufid_present;
 
 	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
-	if (a[OVS_FLOW_ATTR_KEY]) {
-		ovs_match_init(&match, &key, true, &mask);
-		error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
-					  a[OVS_FLOW_ATTR_MASK], log);
-	} else if (!ufid_present) {
+	if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
 		OVS_NLERR(log,
 			  "Flow set message rejected, Key attribute missing.");
-		error = -EINVAL;
+		return -EINVAL;
 	}
+
+	error = ovs_nla_init_match_and_action(net, &match, &key, a,
+					      &acts, log);
 	if (error)
 		goto error;
 
-	/* Validate actions. */
-	if (a[OVS_FLOW_ATTR_ACTIONS]) {
-		if (!a[OVS_FLOW_ATTR_KEY]) {
-			OVS_NLERR(log,
-				  "Flow key attribute not present in set flow.");
-			error = -EINVAL;
-			goto error;
-		}
-
-		acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
-					&mask, log);
-		if (IS_ERR(acts)) {
-			error = PTR_ERR(acts);
-			goto error;
-		}
-
+	if (acts) {
 		/* Can allocate before locking if have acts. */
 		reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
 						ufid_flags);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f9349a495caf..e3beb28203eb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1317,7 +1317,7 @@ static void packet_sock_destruct(struct sock *sk)
 	skb_queue_purge(&sk->sk_error_queue);
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
 		pr_err("Attempt to release alive packet socket: %p\n", sk);
@@ -1739,7 +1739,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		match->flags = flags;
 		INIT_LIST_HEAD(&match->list);
 		spin_lock_init(&match->lock);
-		atomic_set(&match->sk_ref, 0);
+		refcount_set(&match->sk_ref, 0);
 		fanout_init_data(match);
 		match->prot_hook.type = po->prot_hook.type;
 		match->prot_hook.dev = po->prot_hook.dev;
@@ -1753,10 +1753,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 	    match->prot_hook.type == po->prot_hook.type &&
 	    match->prot_hook.dev == po->prot_hook.dev) {
 		err = -ENOSPC;
-		if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
+		if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
 			__dev_remove_pack(&po->prot_hook);
 			po->fanout = match;
-			atomic_inc(&match->sk_ref);
+			refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
 			__fanout_link(sk, po);
 			err = 0;
 		}
@@ -1785,7 +1785,7 @@ static struct packet_fanout *fanout_release(struct sock *sk)
 	if (f) {
 		po->fanout = NULL;
 
-		if (atomic_dec_and_test(&f->sk_ref))
+		if (refcount_dec_and_test(&f->sk_ref))
 			list_del(&f->list);
 		else
 			f = NULL;
@@ -2523,7 +2523,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->data_len = to_write;
 	skb->len += to_write;
 	skb->truesize += to_write;
-	atomic_add(to_write, &po->sk.sk_wmem_alloc);
+	refcount_add(to_write, &po->sk.sk_wmem_alloc);
 
 	while (likely(to_write)) {
 		nr_frags = skb_shinfo(skb)->nr_frags;
@@ -4495,7 +4495,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq,
 			   "%pK %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
 			   s,
-			   atomic_read(&s->sk_refcnt),
+			   refcount_read(&s->sk_refcnt),
 			   s->sk_type,
 			   ntohs(po->num),
 			   po->ifindex,
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 9ee46314b7d7..94d1d405a116 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -1,6 +1,8 @@
 #ifndef __PACKET_INTERNAL_H__
 #define __PACKET_INTERNAL_H__
 
+#include <linux/refcount.h>
+
 struct packet_mclist {
 	struct packet_mclist	*next;
 	int			ifindex;
@@ -86,7 +88,7 @@ struct packet_fanout {
 	struct list_head	list;
 	struct sock		*arr[PACKET_FANOUT_MAX];
 	spinlock_t		lock;
-	atomic_t		sk_ref;
+	refcount_t		sk_ref;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
 };
 
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 64634e3ec2fc..1b050dd17393 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -360,7 +360,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
 		return POLLHUP;
 
 	if (sk->sk_state == TCP_ESTABLISHED &&
-		atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
+		refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
 		atomic_read(&pn->tx_credits))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 
@@ -614,7 +614,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
 			sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
 			from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
 			sock_i_ino(sk),
-			atomic_read(&sk->sk_refcnt), sk,
+			refcount_read(&sk->sk_refcnt), sk,
 			atomic_read(&sk->sk_drops));
 	}
 	seq_pad(seq, '\n');
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 52d11d7725c8..0d8616aa5bad 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -202,7 +202,7 @@ void rds_tcp_write_space(struct sock *sk)
 	tc->t_last_seen_una = rds_tcp_snd_una(tc);
 	rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
 
-	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
 		queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
 
 out:
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 58ae0db52ea1..a2ad4482376f 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -53,7 +53,7 @@ static void rxrpc_sock_destructor(struct sock *);
  */
 static inline int rxrpc_writable(struct sock *sk)
 {
-	return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
+	return refcount_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
 }
 
 /*
@@ -730,7 +730,7 @@ static void rxrpc_sock_destructor(struct sock *sk)
 
 	rxrpc_purge_queue(&sk->sk_receive_queue);
 
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 	WARN_ON(!sk_unhashed(sk));
 	WARN_ON(sk->sk_socket);
 
@@ -747,7 +747,7 @@ static int rxrpc_release_sock(struct sock *sk)
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
 
-	_enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+	_enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
 
 	/* declare the socket closed for business */
 	sock_orphan(sk);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 67b02c45271b..b8985d01876a 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -27,7 +27,7 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
 	int n = atomic_inc_return(select_skb_count(op));
-	trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 }
 
 /*
@@ -38,7 +38,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 	const void *here = __builtin_return_address(0);
 	if (skb) {
 		int n = atomic_read(select_skb_count(op));
-		trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 	}
 }
 
@@ -49,7 +49,7 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
 	int n = atomic_inc_return(select_skb_count(op));
-	trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 	skb_get(skb);
 }
 
@@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 		int n;
 		CHECK_SLAB_OKAY(&skb->users);
 		n = atomic_dec_return(select_skb_count(op));
-		trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 		kfree_skb(skb);
 	}
 }
@@ -78,7 +78,7 @@ void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 		int n;
 		CHECK_SLAB_OKAY(&skb->users);
 		n = atomic_dec_return(select_skb_count(op));
-		trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 		kfree_skb(skb);
 	}
 }
@@ -93,7 +93,7 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
 	while ((skb = skb_dequeue((list))) != NULL) {
 		int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged));
 		trace_rxrpc_skb(skb, rxrpc_skb_rx_purged,
-				atomic_read(&skb->users), n, here);
+				refcount_read(&skb->users), n, here);
 		kfree_skb(skb);
 	}
 }
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index eb0e9bab54c1..d6e97115500b 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -340,7 +340,7 @@ META_COLLECTOR(int_sk_refcnt)
 		*err = -1;
 		return;
 	}
-	dst->value = atomic_read(&skb->sk->sk_refcnt);
+	dst->value = refcount_read(&skb->sk->sk_refcnt);
 }
 
 META_COLLECTOR(int_sk_rcvbuf)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 5d95401bbc02..43b94c7b69bd 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1019,7 +1019,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 		return sch;
 	}
 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
-	ops->destroy(sch);
+	if (ops->destroy)
+		ops->destroy(sch);
 err_out3:
 	dev_put(dev);
 	kfree((char *) sch - sch->padded);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index de162592eee0..572fe2584e48 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -498,7 +498,7 @@ static void sch_atm_dequeue(unsigned long data)
 			ATM_SKB(skb)->vcc = flow->vcc;
 			memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
 			       flow->hdr_len);
-			atomic_add(skb->truesize,
+			refcount_add(skb->truesize,
 				   &sk_atm(flow->vcc)->sk_wmem_alloc);
 			/* atm.atm_options are already set by atm_tc_enqueue */
 			flow->vcc->send(flow->vcc, skb);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 757be416f778..fa4f530ab7e1 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 {
 	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
-	sctp_paramhdr_t *p;
+	struct sctp_paramhdr *p;
 	int i;
 
 	/* Retrieve the SCTP per socket area.  */
@@ -284,9 +284,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 			ntohs(ep->auth_chunk_list->param_hdr.length));
 
 	/* Get the AUTH random number for this association */
-	p = (sctp_paramhdr_t *)asoc->c.auth_random;
+	p = (struct sctp_paramhdr *)asoc->c.auth_random;
 	p->type = SCTP_PARAM_RANDOM;
-	p->length = htons(sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH);
+	p->length = htons(sizeof(*p) + SCTP_AUTH_RANDOM_LENGTH);
 	get_random_bytes(p+1, SCTP_AUTH_RANDOM_LENGTH);
 
 	return asoc;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index f99d4855d3de..8ffa5985cd6e 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -538,7 +538,8 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
 	if (!hmacs)
 		return NULL;
 
-	n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1;
+	n_elt = (ntohs(hmacs->param_hdr.length) -
+		 sizeof(struct sctp_paramhdr)) >> 1;
 	for (i = 0; i < n_elt; i++) {
 		id = ntohs(hmacs->hmac_ids[i]);
 
@@ -589,7 +590,8 @@ int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
 		return 0;
 
 	hmacs = (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs;
-	n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1;
+	n_elt = (ntohs(hmacs->param_hdr.length) -
+		 sizeof(struct sctp_paramhdr)) >> 1;
 
 	return __sctp_auth_find_hmacid(hmacs->hmac_ids, n_elt, hmac_id);
 }
@@ -612,8 +614,8 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
 	if (asoc->default_hmac_id)
 		return;
 
-	n_params = (ntohs(hmacs->param_hdr.length)
-				- sizeof(sctp_paramhdr_t)) >> 1;
+	n_params = (ntohs(hmacs->param_hdr.length) -
+		    sizeof(struct sctp_paramhdr)) >> 1;
 	ep = asoc->ep;
 	for (i = 0; i < n_params; i++) {
 		id = ntohs(hmacs->hmac_ids[i]);
@@ -632,7 +634,7 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
 
 
 /* Check to see if the given chunk is supposed to be authenticated */
-static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
+static int __sctp_auth_cid(enum sctp_cid chunk, struct sctp_chunks_param *param)
 {
 	unsigned short len;
 	int found = 0;
@@ -641,7 +643,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
 	if (!param || param->param_hdr.length == 0)
 		return 0;
 
-	len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t);
+	len = ntohs(param->param_hdr.length) - sizeof(struct sctp_paramhdr);
 
 	/* SCTP-AUTH, Section 3.2
 	 *    The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH
@@ -668,7 +670,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
 }
 
 /* Check if peer requested that this chunk is authenticated */
-int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
+int sctp_auth_send_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
 {
 	if (!asoc)
 		return 0;
@@ -680,7 +682,7 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
 }
 
 /* Check if we requested that peer authenticate this chunk. */
-int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
+int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
 {
 	if (!asoc)
 		return 0;
@@ -775,7 +777,7 @@ int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id)
 
 	/* Check if we can add this chunk to the array */
 	param_len = ntohs(p->param_hdr.length);
-	nchunks = param_len - sizeof(sctp_paramhdr_t);
+	nchunks = param_len - sizeof(struct sctp_paramhdr);
 	if (nchunks == SCTP_NUM_CHUNK_TYPES)
 		return -EINVAL;
 
@@ -812,9 +814,11 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
 		return -EINVAL;
 
 	for (i = 0; i < hmacs->shmac_num_idents; i++)
-		ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
-	ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
-				hmacs->shmac_num_idents * sizeof(__u16));
+		ep->auth_hmacs_list->hmac_ids[i] =
+				htons(hmacs->shmac_idents[i]);
+	ep->auth_hmacs_list->param_hdr.length =
+			htons(sizeof(struct sctp_paramhdr) +
+			hmacs->shmac_num_idents * sizeof(__u16));
 	return 0;
 }
 
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 3dcd0ecf3d99..efbc31877804 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -90,12 +90,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 		 */
 		auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO;
 		auth_hmacs->param_hdr.length =
-					htons(sizeof(sctp_paramhdr_t) + 2);
+					htons(sizeof(struct sctp_paramhdr) + 2);
 		auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1);
 
 		/* Initialize the CHUNKS parameter */
 		auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
-		auth_chunks->param_hdr.length = htons(sizeof(sctp_paramhdr_t));
+		auth_chunks->param_hdr.length =
+					htons(sizeof(struct sctp_paramhdr));
 
 		/* If the Add-IP functionality is enabled, we must
 		 * authenticate, ASCONF and ASCONF-ACK chunks
@@ -104,7 +105,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 			auth_chunks->chunks[0] = SCTP_CID_ASCONF;
 			auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
 			auth_chunks->param_hdr.length =
-					htons(sizeof(sctp_paramhdr_t) + 2);
+					htons(sizeof(struct sctp_paramhdr) + 2);
 		}
 	}
 
@@ -268,16 +269,14 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 
 	memset(ep->secret_key, 0, sizeof(ep->secret_key));
 
-	/* Give up our hold on the sock. */
 	sk = ep->base.sk;
-	if (sk != NULL) {
-		/* Remove and free the port */
-		if (sctp_sk(sk)->bind_hash)
-			sctp_put_port(sk);
+	/* Remove and free the port */
+	if (sctp_sk(sk)->bind_hash)
+		sctp_put_port(sk);
 
-		sctp_sk(sk)->ep = NULL;
-		sock_put(sk);
-	}
+	sctp_sk(sk)->ep = NULL;
+	/* Give up our hold on the sock */
+	sock_put(sk);
 
 	kfree(ep);
 	SCTP_DBG_OBJCNT_DEC(ep);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index ba9ad32fc447..41eb2ec10460 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -663,19 +663,19 @@ out_unlock:
  */
 static int sctp_rcv_ootb(struct sk_buff *skb)
 {
-	sctp_chunkhdr_t *ch, _ch;
+	struct sctp_chunkhdr *ch, _ch;
 	int ch_end, offset = 0;
 
 	/* Scan through all the chunks in the packet.  */
 	do {
 		/* Make sure we have at least the header there */
-		if (offset + sizeof(sctp_chunkhdr_t) > skb->len)
+		if (offset + sizeof(_ch) > skb->len)
 			break;
 
 		ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
 
 		/* Break out if chunk length is less then minimal. */
-		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+		if (ntohs(ch->length) < sizeof(_ch))
 			break;
 
 		ch_end = offset + SCTP_PAD4(ntohs(ch->length));
@@ -1051,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
 	union sctp_addr *paddr = &addr;
 	struct sctphdr *sh = sctp_hdr(skb);
 	union sctp_params params;
-	sctp_init_chunk_t *init;
+	struct sctp_init_chunk *init;
 	struct sctp_af *af;
 
 	/*
@@ -1070,7 +1070,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
 	/* Find the start of the TLVs and the end of the chunk.  This is
 	 * the region we search for address parameters.
 	 */
-	init = (sctp_init_chunk_t *)skb->data;
+	init = (struct sctp_init_chunk *)skb->data;
 
 	/* Walk the parameters looking for embedded addresses. */
 	sctp_walk_params(params, init, init_hdr.params) {
@@ -1106,7 +1106,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
  */
 static struct sctp_association *__sctp_rcv_asconf_lookup(
 					struct net *net,
-					sctp_chunkhdr_t *ch,
+					struct sctp_chunkhdr *ch,
 					const union sctp_addr *laddr,
 					__be16 peer_port,
 					struct sctp_transport **transportp)
@@ -1144,7 +1144,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
 				      struct sctp_transport **transportp)
 {
 	struct sctp_association *asoc = NULL;
-	sctp_chunkhdr_t *ch;
+	struct sctp_chunkhdr *ch;
 	int have_auth = 0;
 	unsigned int chunk_num = 1;
 	__u8 *ch_end;
@@ -1152,10 +1152,10 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
 	/* Walk through the chunks looking for AUTH or ASCONF chunks
 	 * to help us find the association.
 	 */
-	ch = (sctp_chunkhdr_t *) skb->data;
+	ch = (struct sctp_chunkhdr *)skb->data;
 	do {
 		/* Break out if chunk length is less then minimal. */
-		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+		if (ntohs(ch->length) < sizeof(*ch))
 			break;
 
 		ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
@@ -1192,7 +1192,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
 		if (asoc)
 			break;
 
-		ch = (sctp_chunkhdr_t *) ch_end;
+		ch = (struct sctp_chunkhdr *)ch_end;
 		chunk_num++;
 	} while (ch_end < skb_tail_pointer(skb));
 
@@ -1210,7 +1210,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
 				      const union sctp_addr *laddr,
 				      struct sctp_transport **transportp)
 {
-	sctp_chunkhdr_t *ch;
+	struct sctp_chunkhdr *ch;
 
 	/* We do not allow GSO frames here as we need to linearize and
 	 * then cannot guarantee frame boundaries. This shouldn't be an
@@ -1220,7 +1220,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
 	if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
 		return NULL;
 
-	ch = (sctp_chunkhdr_t *) skb->data;
+	ch = (struct sctp_chunkhdr *)skb->data;
 
 	/* The code below will attempt to walk the chunk and extract
 	 * parameter information.  Before we do that, we need to verify
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index f731de3e8428..48392552ee7c 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -99,7 +99,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
 struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
 {
 	struct sctp_chunk *chunk;
-	sctp_chunkhdr_t *ch = NULL;
+	struct sctp_chunkhdr *ch = NULL;
 
 	chunk = queue->in_progress;
 	/* If there is no more chunks in this packet, say so */
@@ -108,7 +108,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
 	    chunk->pdiscard)
 		    return NULL;
 
-	ch = (sctp_chunkhdr_t *)chunk->chunk_end;
+	ch = (struct sctp_chunkhdr *)chunk->chunk_end;
 
 	return ch;
 }
@@ -122,7 +122,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
 struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 {
 	struct sctp_chunk *chunk;
-	sctp_chunkhdr_t *ch = NULL;
+	struct sctp_chunkhdr *ch = NULL;
 
 	/* The assumption is that we are safe to process the chunks
 	 * at this time.
@@ -151,7 +151,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 			chunk = queue->in_progress = NULL;
 		} else {
 			/* Nothing to do. Next chunk in the packet, please. */
-			ch = (sctp_chunkhdr_t *) chunk->chunk_end;
+			ch = (struct sctp_chunkhdr *)chunk->chunk_end;
 			/* Force chunk->skb->data to chunk->chunk_end.  */
 			skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data);
 			/* We are guaranteed to pull a SCTP header. */
@@ -195,7 +195,7 @@ next_chunk:
 
 new_skb:
 		/* This is the first chunk in the packet.  */
-		ch = (sctp_chunkhdr_t *) chunk->skb->data;
+		ch = (struct sctp_chunkhdr *)chunk->skb->data;
 		chunk->singleton = 1;
 		chunk->data_accepted = 0;
 		chunk->pdiscard = 0;
@@ -214,11 +214,10 @@ new_skb:
 
 	chunk->chunk_hdr = ch;
 	chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
-	skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
+	skb_pull(chunk->skb, sizeof(*ch));
 	chunk->subh.v = NULL; /* Subheader is no longer valid.  */
 
-	if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) <
-	    skb_tail_pointer(chunk->skb)) {
+	if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) {
 		/* This is not a singleton */
 		chunk->singleton = 0;
 	} else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 89cee1482d35..9d8504985744 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -402,7 +402,7 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
 	 * therefore only reserve a single byte to keep socket around until
 	 * the packet has been transmitted.
 	 */
-	atomic_inc(&sk->sk_wmem_alloc);
+	refcount_inc(&sk->sk_wmem_alloc);
 }
 
 static int sctp_packet_pack(struct sctp_packet *packet,
@@ -723,8 +723,8 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
 	/* Check whether this chunk and all the rest of pending data will fit
 	 * or delay in hopes of bundling a full sized packet.
 	 */
-	if (chunk->skb->len + q->out_qlen >
-		transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4)
+	if (chunk->skb->len + q->out_qlen > transport->pathmtu -
+		packet->overhead - sizeof(struct sctp_data_chunk) - 4)
 		/* Enough data queued to fill a packet */
 		return SCTP_XMIT_OK;
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 20299df163b9..e8762702a313 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1102,7 +1102,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 				 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
 				 "illegal chunk", ntohl(chunk->subh.data_hdr->tsn),
 				 chunk->skb ? chunk->skb->head : NULL, chunk->skb ?
-				 atomic_read(&chunk->skb->users) : -1);
+				 refcount_read(&chunk->skb->users) : -1);
 
 			/* Add the chunk to the packet.  */
 			status = sctp_packet_transmit_chunk(packet, chunk, 0, gfp);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 8e34db56bc1d..26b4be6b4172 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -363,7 +363,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 		assoc->stream.outcnt, assoc->max_retrans,
 		assoc->init_retries, assoc->shutdown_retries,
 		assoc->rtx_data_chunks,
-		atomic_read(&sk->sk_wmem_alloc),
+		refcount_read(&sk->sk_wmem_alloc),
 		sk->sk_wmem_queued,
 		sk->sk_sndbuf,
 		sk->sk_rcvbuf);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 4b1967997c16..3af4dd024ec0 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -217,7 +217,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 {
 	struct net *net = sock_net(asoc->base.sk);
 	struct sctp_endpoint *ep = asoc->ep;
-	sctp_inithdr_t init;
+	struct sctp_inithdr init;
 	union sctp_params addrs;
 	size_t chunksize;
 	struct sctp_chunk *retval = NULL;
@@ -229,7 +229,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	sctp_supported_ext_param_t ext_param;
 	int num_ext = 0;
 	__u8 extensions[3];
-	sctp_paramhdr_t *auth_chunks = NULL,
+	struct sctp_paramhdr *auth_chunks = NULL,
 			*auth_hmacs = NULL;
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
@@ -286,14 +286,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 		chunksize += sizeof(asoc->c.auth_random);
 
 		/* Add HMACS parameter length if any were defined */
-		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
+		auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
 			chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
 		/* Add CHUNKS parameter length */
-		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
+		auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
 			chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
 		else
@@ -385,7 +385,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 				 const struct sctp_chunk *chunk,
 				 gfp_t gfp, int unkparam_len)
 {
-	sctp_inithdr_t initack;
+	struct sctp_inithdr initack;
 	struct sctp_chunk *retval;
 	union sctp_params addrs;
 	struct sctp_sock *sp;
@@ -397,7 +397,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	sctp_supported_ext_param_t ext_param;
 	int num_ext = 0;
 	__u8 extensions[3];
-	sctp_paramhdr_t *auth_chunks = NULL,
+	struct sctp_paramhdr *auth_chunks = NULL,
 			*auth_hmacs = NULL,
 			*auth_random = NULL;
 
@@ -448,16 +448,16 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 		chunksize += sizeof(aiparam);
 
 	if (asoc->peer.auth_capable) {
-		auth_random = (sctp_paramhdr_t *)asoc->c.auth_random;
+		auth_random = (struct sctp_paramhdr *)asoc->c.auth_random;
 		chunksize += ntohs(auth_random->length);
 
-		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
+		auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
 			chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
-		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
+		auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
 			chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
 		else
@@ -1085,18 +1085,18 @@ struct sctp_chunk *sctp_make_abort_violation(
 	struct sctp_chunk  *retval;
 	struct sctp_paramhdr phdr;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen
-					+ sizeof(sctp_paramhdr_t));
+	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen +
+					      sizeof(phdr));
 	if (!retval)
 		goto end;
 
-	sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen
-					+ sizeof(sctp_paramhdr_t));
+	sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen +
+							    sizeof(phdr));
 
 	phdr.type = htons(chunk->chunk_hdr->type);
 	phdr.length = chunk->chunk_hdr->length;
 	sctp_addto_chunk(retval, paylen, payload);
-	sctp_addto_param(retval, sizeof(sctp_paramhdr_t), &phdr);
+	sctp_addto_param(retval, sizeof(phdr), &phdr);
 
 end:
 	return retval;
@@ -1110,16 +1110,16 @@ struct sctp_chunk *sctp_make_violation_paramlen(
 	struct sctp_chunk *retval;
 	static const char error[] = "The following parameter had invalid length:";
 	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
-				sizeof(sctp_paramhdr_t);
+			     sizeof(*param);
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
 	if (!retval)
 		goto nodata;
 
 	sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION,
-			sizeof(error) + sizeof(sctp_paramhdr_t));
+			sizeof(error) + sizeof(*param));
 	sctp_addto_chunk(retval, sizeof(error), error);
-	sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param);
+	sctp_addto_param(retval, sizeof(*param), param);
 
 nodata:
 	return retval;
@@ -1379,20 +1379,20 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
 					    gfp_t gfp)
 {
 	struct sctp_chunk *retval;
-	sctp_chunkhdr_t *chunk_hdr;
+	struct sctp_chunkhdr *chunk_hdr;
 	struct sk_buff *skb;
 	struct sock *sk;
 
 	/* No need to allocate LL here, as this is only a chunk. */
-	skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
+	skb = alloc_skb(SCTP_PAD4(sizeof(*chunk_hdr) + paylen), gfp);
 	if (!skb)
 		goto nodata;
 
 	/* Make room for the chunk header.  */
-	chunk_hdr = skb_put(skb, sizeof(sctp_chunkhdr_t));
+	chunk_hdr = (struct sctp_chunkhdr *)skb_put(skb, sizeof(*chunk_hdr));
 	chunk_hdr->type	  = type;
 	chunk_hdr->flags  = flags;
-	chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t));
+	chunk_hdr->length = htons(sizeof(*chunk_hdr));
 
 	sk = asoc ? asoc->base.sk : NULL;
 	retval = sctp_chunkify(skb, asoc, sk, gfp);
@@ -1402,7 +1402,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
 	}
 
 	retval->chunk_hdr = chunk_hdr;
-	retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(struct sctp_chunkhdr);
+	retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(*chunk_hdr);
 
 	/* Determine if the chunk needs to be authenticated */
 	if (sctp_auth_send_cid(type, asoc))
@@ -1614,7 +1614,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 	/* Header size is static data prior to the actual cookie, including
 	 * any padding.
 	 */
-	headersize = sizeof(sctp_paramhdr_t) +
+	headersize = sizeof(struct sctp_paramhdr) +
 		     (sizeof(struct sctp_signed_cookie) -
 		      sizeof(struct sctp_cookie));
 	bodysize = sizeof(struct sctp_cookie)
@@ -1710,7 +1710,7 @@ struct sctp_association *sctp_unpack_cookie(
 	/* Header size is static data prior to the actual cookie, including
 	 * any padding.
 	 */
-	headersize = sizeof(sctp_chunkhdr_t) +
+	headersize = sizeof(struct sctp_chunkhdr) +
 		     (sizeof(struct sctp_signed_cookie) -
 		      sizeof(struct sctp_cookie));
 	bodysize = ntohs(chunk->chunk_hdr->length) - headersize;
@@ -1882,7 +1882,7 @@ struct __sctp_missing {
  * Report a missing mandatory parameter.
  */
 static int sctp_process_missing_param(const struct sctp_association *asoc,
-				      sctp_param_t paramtype,
+				      enum sctp_param paramtype,
 				      struct sctp_chunk *chunk,
 				      struct sctp_chunk **errp)
 {
@@ -1975,7 +1975,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
 
 static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 {
-	__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
 	int have_auth = 0;
 	int have_asconf = 0;
 	int i;
@@ -2010,7 +2010,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
 				    union sctp_params param)
 {
 	struct net *net = sock_net(asoc->base.sk);
-	__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
@@ -2123,7 +2123,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					union sctp_params param,
-					sctp_cid_t cid,
+					enum sctp_cid cid,
 					struct sctp_chunk *chunk,
 					struct sctp_chunk **err_chunk)
 {
@@ -2180,7 +2180,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		 * cause 'Protocol Violation'.
 		 */
 		if (SCTP_AUTH_RANDOM_LENGTH !=
-			ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) {
+			ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) {
 			sctp_process_inv_paramlength(asoc, param.p,
 							chunk, err_chunk);
 			retval = SCTP_IERROR_ABORT;
@@ -2208,7 +2208,8 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 			goto fallthrough;
 
 		hmacs = (struct sctp_hmac_algo_param *)param.p;
-		n_elt = (ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) >> 1;
+		n_elt = (ntohs(param.p->length) -
+			 sizeof(struct sctp_paramhdr)) >> 1;
 
 		/* SCTP-AUTH: Section 6.1
 		 * The HMAC algorithm based on SHA-1 MUST be supported and
@@ -2240,9 +2241,9 @@ fallthrough:
 
 /* Verify the INIT packet before we process it.  */
 int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
-		     const struct sctp_association *asoc, sctp_cid_t cid,
-		     sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk,
-		     struct sctp_chunk **errp)
+		     const struct sctp_association *asoc, enum sctp_cid cid,
+		     struct sctp_init_chunk *peer_init,
+		     struct sctp_chunk *chunk, struct sctp_chunk **errp)
 {
 	union sctp_params param;
 	bool has_cookie = false;
@@ -2306,7 +2307,7 @@ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
  */
 int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 		      const union sctp_addr *peer_addr,
-		      sctp_init_chunk_t *peer_init, gfp_t gfp)
+		      struct sctp_init_chunk *peer_init, gfp_t gfp)
 {
 	struct net *net = sock_net(asoc->base.sk);
 	union sctp_params param;
@@ -2565,7 +2566,7 @@ do_addr_param:
 			asoc->peer.ipv4_address = 1;
 
 		/* Cycle through address types; avoid divide by 0. */
-		sat = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+		sat = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
 		if (sat)
 			sat /= sizeof(__u16);
 
@@ -2592,7 +2593,7 @@ do_addr_param:
 
 	case SCTP_PARAM_STATE_COOKIE:
 		asoc->peer.cookie_len =
-			ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+			ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
 		asoc->peer.cookie = param.cookie->body;
 		break;
 
@@ -3176,7 +3177,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 				return false;
 			length = ntohs(param.addip->param_hdr.length);
 			if (length < sizeof(sctp_addip_param_t) +
-				     sizeof(sctp_paramhdr_t))
+				     sizeof(**errp))
 				return false;
 			break;
 		case SCTP_PARAM_SUCCESS_REPORT:
@@ -3218,7 +3219,8 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 	int	chunk_len;
 	__u32	serial;
 
-	chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
+	chunk_len = ntohs(asconf->chunk_hdr->length) -
+		    sizeof(struct sctp_chunkhdr);
 	hdr = (sctp_addiphdr_t *)asconf->skb->data;
 	serial = ntohl(hdr->serial);
 
@@ -3364,7 +3366,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 		err_code = SCTP_ERROR_REQ_REFUSED;
 
 	asconf_ack_len = ntohs(asconf_ack->chunk_hdr->length) -
-			     sizeof(sctp_chunkhdr_t);
+			 sizeof(struct sctp_chunkhdr);
 
 	/* Skip the addiphdr from the asconf_ack chunk and store a pointer to
 	 * the first asconf_ack parameter.
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index dfe1fcb520ba..d6e5e9e0fd6d 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -647,7 +647,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
 static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
 				 struct sctp_association *asoc,
 				 struct sctp_chunk *chunk,
-				 sctp_init_chunk_t *peer_init,
+				 struct sctp_init_chunk *peer_init,
 				 gfp_t gfp)
 {
 	int error;
@@ -955,9 +955,10 @@ static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds,
 		switch (err_hdr->cause) {
 		case SCTP_ERROR_UNKNOWN_CHUNK:
 		{
-			sctp_chunkhdr_t *unk_chunk_hdr;
+			struct sctp_chunkhdr *unk_chunk_hdr;
 
-			unk_chunk_hdr = (sctp_chunkhdr_t *)err_hdr->variable;
+			unk_chunk_hdr = (struct sctp_chunkhdr *)
+							err_hdr->variable;
 			switch (unk_chunk_hdr->type) {
 			/* ADDIP 4.1 A9) If the peer responds to an ASCONF with
 			 * an ERROR chunk reporting that it did not recognized
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8feff96a5bef..b2a74c3823ee 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -235,7 +235,7 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
 		return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -345,7 +345,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 	 * error, but since we don't have an association, we'll
 	 * just discard the packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* If the INIT is coming toward a closing socket, we'll send back
@@ -360,7 +360,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
 	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+			      (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 		/* This chunk contains fatal error. It is to be discarded.
 		 * Send an ABORT, with causes if there is any.
@@ -368,9 +368,9 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 		if (err_chunk) {
 			packet = sctp_abort_pkt_new(net, ep, asoc, arg,
 					(__u8 *)(err_chunk->chunk_hdr) +
-					sizeof(sctp_chunkhdr_t),
+					sizeof(struct sctp_chunkhdr),
 					ntohs(err_chunk->chunk_hdr->length) -
-					sizeof(sctp_chunkhdr_t));
+					sizeof(struct sctp_chunkhdr));
 
 			sctp_chunk_free(err_chunk);
 
@@ -389,10 +389,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 	}
 
 	/* Grab the INIT header.  */
-	chunk->subh.init_hdr = (sctp_inithdr_t *)chunk->skb->data;
+	chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
 
 	/* Tag the variable length parameters.  */
-	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
 
 	new_asoc = sctp_make_temp_asoc(ep, chunk, GFP_ATOMIC);
 	if (!new_asoc)
@@ -405,7 +405,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 
 	/* The call, sctp_process_init(), can fail on memory allocation.  */
 	if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk),
-			       (sctp_init_chunk_t *)chunk->chunk_hdr,
+			       (struct sctp_init_chunk *)chunk->chunk_hdr,
 			       GFP_ATOMIC))
 		goto nomem_init;
 
@@ -417,7 +417,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 	len = 0;
 	if (err_chunk)
 		len = ntohs(err_chunk->chunk_hdr->length) -
-			sizeof(sctp_chunkhdr_t);
+		      sizeof(struct sctp_chunkhdr);
 
 	repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
 	if (!repl)
@@ -437,7 +437,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 		 */
 		unk_param = (sctp_unrecognized_param_t *)
 			    ((__u8 *)(err_chunk->chunk_hdr) +
-			    sizeof(sctp_chunkhdr_t));
+			    sizeof(struct sctp_chunkhdr));
 		/* Replace the cause code with the "Unrecognized parameter"
 		 * parameter type.
 		 */
@@ -503,7 +503,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 				       sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_init_chunk_t *initchunk;
+	struct sctp_init_chunk *initchunk;
 	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
 
@@ -522,12 +522,12 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* Grab the INIT header.  */
-	chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
+	chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
 	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+			      (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 
 		sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
@@ -540,9 +540,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 		if (err_chunk) {
 			packet = sctp_abort_pkt_new(net, ep, asoc, arg,
 					(__u8 *)(err_chunk->chunk_hdr) +
-					sizeof(sctp_chunkhdr_t),
+					sizeof(struct sctp_chunkhdr),
 					ntohs(err_chunk->chunk_hdr->length) -
-					sizeof(sctp_chunkhdr_t));
+					sizeof(struct sctp_chunkhdr));
 
 			sctp_chunk_free(err_chunk);
 
@@ -576,9 +576,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 	/* Tag the variable length parameters.  Note that we never
 	 * convert the parameters in an INIT chunk.
 	 */
-	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
 
-	initchunk = (sctp_init_chunk_t *) chunk->chunk_hdr;
+	initchunk = (struct sctp_init_chunk *)chunk->chunk_hdr;
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT,
 			SCTP_PEER_INIT(initchunk));
@@ -653,7 +653,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_association *new_asoc;
-	sctp_init_chunk_t *peer_init;
+	struct sctp_init_chunk *peer_init;
 	struct sctp_chunk *repl;
 	struct sctp_ulpevent *ev, *ai_ev = NULL;
 	int error = 0;
@@ -673,7 +673,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 	 * chunk header.  More detailed verification is done
 	 * in sctp_unpack_cookie().
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* If the endpoint is not listening or if the number of associations
@@ -691,7 +691,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 	chunk->subh.cookie_hdr =
 		(struct sctp_signed_cookie *)chunk->skb->data;
 	if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
-					 sizeof(sctp_chunkhdr_t)))
+					 sizeof(struct sctp_chunkhdr)))
 		goto nomem;
 
 	/* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint
@@ -770,9 +770,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 		auth.skb = chunk->auth_chunk;
 		auth.asoc = chunk->asoc;
 		auth.sctp_hdr = chunk->sctp_hdr;
-		auth.chunk_hdr = skb_push(chunk->auth_chunk,
-					  sizeof(sctp_chunkhdr_t));
-		skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t));
+		auth.chunk_hdr = (struct sctp_chunkhdr *)
+					skb_push(chunk->auth_chunk,
+						 sizeof(struct sctp_chunkhdr));
+		skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
 		auth.transport = chunk->transport;
 
 		ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
@@ -886,7 +887,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
 	/* Verify that the chunk length for the COOKIE-ACK is OK.
 	 * If we don't do this, any bundled chunks may be junked.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -1080,7 +1081,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
-	sctp_paramhdr_t *param_hdr;
+	struct sctp_paramhdr *param_hdr;
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
 	size_t paylen = 0;
@@ -1097,9 +1098,9 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 	 * respond with a HEARTBEAT ACK that contains the Heartbeat
 	 * Information field copied from the received HEARTBEAT chunk.
 	 */
-	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data;
-	param_hdr = (sctp_paramhdr_t *) chunk->subh.hb_hdr;
-	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
+	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data;
+	param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr;
+	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr);
 
 	if (ntohs(param_hdr->length) > paylen)
 		return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
@@ -1164,7 +1165,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the HEARTBEAT-ACK chunk has a valid length.  */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) +
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) +
 					    sizeof(sctp_sender_hb_info_t)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
@@ -1449,19 +1450,19 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	 * In this case, we generate a protocol violation since we have
 	 * an association established.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* Grab the INIT header.  */
-	chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
+	chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
 
 	/* Tag the variable length parameters.  */
-	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+	chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
 	if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+			      (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 		/* This chunk contains fatal error. It is to be discarded.
 		 * Send an ABORT, with causes if there is any.
@@ -1469,9 +1470,9 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 		if (err_chunk) {
 			packet = sctp_abort_pkt_new(net, ep, asoc, arg,
 					(__u8 *)(err_chunk->chunk_hdr) +
-					sizeof(sctp_chunkhdr_t),
+					sizeof(struct sctp_chunkhdr),
 					ntohs(err_chunk->chunk_hdr->length) -
-					sizeof(sctp_chunkhdr_t));
+					sizeof(struct sctp_chunkhdr));
 
 			if (packet) {
 				sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
@@ -1508,7 +1509,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	 * place (local tie-tag and per tie-tag) within the state cookie.
 	 */
 	if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk),
-			       (sctp_init_chunk_t *)chunk->chunk_hdr,
+			       (struct sctp_init_chunk *)chunk->chunk_hdr,
 			       GFP_ATOMIC))
 		goto nomem;
 
@@ -1535,7 +1536,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	len = 0;
 	if (err_chunk) {
 		len = ntohs(err_chunk->chunk_hdr->length) -
-			sizeof(sctp_chunkhdr_t);
+		      sizeof(struct sctp_chunkhdr);
 	}
 
 	repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
@@ -1556,7 +1557,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 		 */
 		unk_param = (sctp_unrecognized_param_t *)
 			    ((__u8 *)(err_chunk->chunk_hdr) +
-			    sizeof(sctp_chunkhdr_t));
+			    sizeof(struct sctp_chunkhdr));
 		/* Replace the cause code with the "Unrecognized parameter"
 		 * parameter type.
 		 */
@@ -1729,7 +1730,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
 					sctp_cmd_seq_t *commands,
 					struct sctp_association *new_asoc)
 {
-	sctp_init_chunk_t *peer_init;
+	struct sctp_init_chunk *peer_init;
 	struct sctp_ulpevent *ev;
 	struct sctp_chunk *repl;
 	struct sctp_chunk *err;
@@ -1844,7 +1845,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
 					sctp_cmd_seq_t *commands,
 					struct sctp_association *new_asoc)
 {
-	sctp_init_chunk_t *peer_init;
+	struct sctp_init_chunk *peer_init;
 	struct sctp_chunk *repl;
 
 	/* new_asoc is a brand-new association, so these are not yet
@@ -2044,7 +2045,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 	 * enough for the chunk header.  Cookie length verification is
 	 * done later.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -2053,7 +2054,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 	 */
 	chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data;
 	if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
-					sizeof(sctp_chunkhdr_t)))
+					sizeof(struct sctp_chunkhdr)))
 		goto nomem;
 
 	/* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie
@@ -2806,7 +2807,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
 	struct sctp_chunk *reply;
 
 	/* Make sure that the chunk has a valid length */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -2989,7 +2990,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -3009,7 +3010,8 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
 		return SCTP_DISPOSITION_ABORT;
 	case SCTP_IERROR_PROTO_VIOLATION:
 		return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
-			(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
+					       (u8 *)chunk->subh.data_hdr,
+					       sizeof(struct sctp_datahdr));
 	default:
 		BUG();
 	}
@@ -3107,7 +3109,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -3123,7 +3125,8 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
 		return SCTP_DISPOSITION_ABORT;
 	case SCTP_IERROR_PROTO_VIOLATION:
 		return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
-			(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
+					       (u8 *)chunk->subh.data_hdr,
+					       sizeof(struct sctp_datahdr));
 	default:
 		BUG();
 	}
@@ -3358,7 +3361,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* 10.2 H) SHUTDOWN COMPLETE notification
@@ -3435,7 +3438,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
-	sctp_chunkhdr_t *ch;
+	struct sctp_chunkhdr *ch;
 	sctp_errhdr_t *err;
 	__u8 *ch_end;
 	int ootb_shut_ack = 0;
@@ -3443,10 +3446,10 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 
 	SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 
-	ch = (sctp_chunkhdr_t *) chunk->chunk_hdr;
+	ch = (struct sctp_chunkhdr *)chunk->chunk_hdr;
 	do {
 		/* Report violation if the chunk is less then minimal */
-		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+		if (ntohs(ch->length) < sizeof(*ch))
 			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -3487,7 +3490,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 			}
 		}
 
-		ch = (sctp_chunkhdr_t *) ch_end;
+		ch = (struct sctp_chunkhdr *)ch_end;
 	} while (ch_end < skb_tail_pointer(skb));
 
 	if (ootb_shut_ack)
@@ -3560,7 +3563,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
 	/* If the chunk length is invalid, we don't want to process
 	 * the reset of the packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* We need to discard the rest of the packet to prevent
@@ -3591,7 +3594,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
 	struct sctp_chunk *chunk = arg;
 
 	/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -4256,7 +4259,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 {
 	struct sctp_chunk *unk_chunk = arg;
 	struct sctp_chunk *err_chunk;
-	sctp_chunkhdr_t *hdr;
+	struct sctp_chunkhdr *hdr;
 
 	pr_debug("%s: processing unknown chunk id:%d\n", __func__, type.chunk);
 
@@ -4267,7 +4270,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 	 * Since we don't know the chunk type, we use a general
 	 * chunkhdr structure to make a comparison.
 	 */
-	if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(unk_chunk, sizeof(*hdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -4340,7 +4343,7 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
 	 * Since we don't know the chunk type, we use a general
 	 * chunkhdr structure to make a comparison.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -4405,7 +4408,7 @@ sctp_disposition_t sctp_sf_violation(struct net *net,
 	struct sctp_chunk *chunk = arg;
 
 	/* Make sure that the chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -6121,9 +6124,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
 		switch (chunk->chunk_hdr->type) {
 		case SCTP_CID_INIT:
 		{
-			sctp_init_chunk_t *init;
+			struct sctp_init_chunk *init;
 
-			init = (sctp_init_chunk_t *)chunk->chunk_hdr;
+			init = (struct sctp_init_chunk *)chunk->chunk_hdr;
 			vtag = ntohl(init->init_hdr.init_tag);
 			break;
 		}
@@ -6196,7 +6199,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
 			 sctp_cmd_seq_t *commands)
 {
-	sctp_datahdr_t *data_hdr;
+	struct sctp_datahdr *data_hdr;
 	struct sctp_chunk *err;
 	size_t datalen;
 	sctp_verb_t deliver;
@@ -6209,8 +6212,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	u16 sid;
 	u8 ordered = 0;
 
-	data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
+	data_hdr = (struct sctp_datahdr *)chunk->skb->data;
+	chunk->subh.data_hdr = data_hdr;
+	skb_pull(chunk->skb, sizeof(*data_hdr));
 
 	tsn = ntohl(data_hdr->tsn);
 	pr_debug("%s: TSN 0x%x\n", __func__, tsn);
@@ -6258,7 +6262,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * Actually, allow a little bit of overflow (up to a MTU).
 	 */
 	datalen = ntohs(chunk->chunk_hdr->length);
-	datalen -= sizeof(sctp_data_chunk_t);
+	datalen -= sizeof(struct sctp_data_chunk);
 
 	deliver = SCTP_CMD_CHUNK_ULP;
 
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 419b18ebb056..3e958c1c4b95 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -53,7 +53,7 @@ static const sctp_sm_table_entry_t
 timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
 
 static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    sctp_cid_t cid,
+							    enum sctp_cid cid,
 							    sctp_state_t state);
 
 
@@ -968,7 +968,7 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
 };
 
 static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    sctp_cid_t cid,
+							    enum sctp_cid cid,
 							    sctp_state_t state)
 {
 	if (state > SCTP_STATE_MAX)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7b6e20eb9451..1db478e34520 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -164,7 +164,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 				sizeof(struct sk_buff) +
 				sizeof(struct sctp_chunk);
 
-	atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+	refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
 	sk->sk_wmem_queued += chunk->skb->truesize;
 	sk_mem_charge(sk, chunk->skb->truesize);
 }
@@ -4933,11 +4933,47 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
 }
 EXPORT_SYMBOL(sctp_do_peeloff);
 
+static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *peeloff,
+					  struct file **newfile, unsigned flags)
+{
+	struct socket *newsock;
+	int retval;
+
+	retval = sctp_do_peeloff(sk, peeloff->associd, &newsock);
+	if (retval < 0)
+		goto out;
+
+	/* Map the socket to an unused fd that can be returned to the user.  */
+	retval = get_unused_fd_flags(flags & SOCK_CLOEXEC);
+	if (retval < 0) {
+		sock_release(newsock);
+		goto out;
+	}
+
+	*newfile = sock_alloc_file(newsock, 0, NULL);
+	if (IS_ERR(*newfile)) {
+		put_unused_fd(retval);
+		sock_release(newsock);
+		retval = PTR_ERR(*newfile);
+		*newfile = NULL;
+		return retval;
+	}
+
+	pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk,
+		 retval);
+
+	peeloff->sd = retval;
+
+	if (flags & SOCK_NONBLOCK)
+		(*newfile)->f_flags |= O_NONBLOCK;
+out:
+	return retval;
+}
+
 static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval, int __user *optlen)
 {
 	sctp_peeloff_arg_t peeloff;
-	struct socket *newsock;
-	struct file *newfile;
+	struct file *newfile = NULL;
 	int retval = 0;
 
 	if (len < sizeof(sctp_peeloff_arg_t))
@@ -4946,26 +4982,44 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
 	if (copy_from_user(&peeloff, optval, len))
 		return -EFAULT;
 
-	retval = sctp_do_peeloff(sk, peeloff.associd, &newsock);
+	retval = sctp_getsockopt_peeloff_common(sk, &peeloff, &newfile, 0);
 	if (retval < 0)
 		goto out;
 
-	/* Map the socket to an unused fd that can be returned to the user.  */
-	retval = get_unused_fd_flags(0);
-	if (retval < 0) {
-		sock_release(newsock);
-		goto out;
+	/* Return the fd mapped to the new socket.  */
+	if (put_user(len, optlen)) {
+		fput(newfile);
+		put_unused_fd(retval);
+		return -EFAULT;
 	}
 
-	newfile = sock_alloc_file(newsock, 0, NULL);
-	if (IS_ERR(newfile)) {
+	if (copy_to_user(optval, &peeloff, len)) {
+		fput(newfile);
 		put_unused_fd(retval);
-		sock_release(newsock);
-		return PTR_ERR(newfile);
+		return -EFAULT;
 	}
+	fd_install(retval, newfile);
+out:
+	return retval;
+}
 
-	pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk,
-		 retval);
+static int sctp_getsockopt_peeloff_flags(struct sock *sk, int len,
+					 char __user *optval, int __user *optlen)
+{
+	sctp_peeloff_flags_arg_t peeloff;
+	struct file *newfile = NULL;
+	int retval = 0;
+
+	if (len < sizeof(sctp_peeloff_flags_arg_t))
+		return -EINVAL;
+	len = sizeof(sctp_peeloff_flags_arg_t);
+	if (copy_from_user(&peeloff, optval, len))
+		return -EFAULT;
+
+	retval = sctp_getsockopt_peeloff_common(sk, &peeloff.p_arg,
+						&newfile, peeloff.flags);
+	if (retval < 0)
+		goto out;
 
 	/* Return the fd mapped to the new socket.  */
 	if (put_user(len, optlen)) {
@@ -4973,7 +5027,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
 		put_unused_fd(retval);
 		return -EFAULT;
 	}
-	peeloff.sd = retval;
+
 	if (copy_to_user(optval, &peeloff, len)) {
 		fput(newfile);
 		put_unused_fd(retval);
@@ -6033,7 +6087,8 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
 		return -EACCES;
 
 	hmacs = ep->auth_hmacs_list;
-	data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t);
+	data_len = ntohs(hmacs->param_hdr.length) -
+		   sizeof(struct sctp_paramhdr);
 
 	if (len < sizeof(struct sctp_hmacalgo) + data_len)
 		return -EINVAL;
@@ -6117,7 +6172,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 		goto num;
 
 	/* See if the user provided enough room for all the data */
-	num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t);
+	num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr);
 	if (len < num_chunks)
 		return -EINVAL;
 
@@ -6165,7 +6220,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 	if (!ch)
 		goto num;
 
-	num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t);
+	num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr);
 	if (len < sizeof(struct sctp_authchunks) + num_chunks)
 		return -EINVAL;
 
@@ -6758,6 +6813,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_SOCKOPT_PEELOFF:
 		retval = sctp_getsockopt_peeloff(sk, len, optval, optlen);
 		break;
+	case SCTP_SOCKOPT_PEELOFF_FLAGS:
+		retval = sctp_getsockopt_peeloff_flags(sk, len, optval, optlen);
+		break;
 	case SCTP_PEER_ADDR_PARAMS:
 		retval = sctp_getsockopt_peer_addr_params(sk, len, optval,
 							  optlen);
@@ -7563,7 +7621,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
 		if (flags & MSG_PEEK) {
 			skb = skb_peek(&sk->sk_receive_queue);
 			if (skb)
-				atomic_inc(&skb->users);
+				refcount_inc(&skb->users);
 		} else {
 			skb = __skb_dequeue(&sk->sk_receive_queue);
 		}
@@ -7684,7 +7742,7 @@ static void sctp_wfree(struct sk_buff *skb)
 				sizeof(struct sk_buff) +
 				sizeof(struct sctp_chunk);
 
-	atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+	WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc));
 
 	/*
 	 * This undoes what is done via sctp_set_owner_w and sk_mem_charge
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 82e6d40052a8..63ea15503714 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -304,7 +304,7 @@ out:
 	return retval;
 }
 
-static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
+static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param(
 			struct sctp_association *asoc, __u32 resp_seq,
 			__be16 type)
 {
@@ -749,7 +749,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
 	struct sctp_strreset_resp *resp = param.v;
 	struct sctp_transport *t;
 	__u16 i, nums, flags = 0;
-	sctp_paramhdr_t *req;
+	struct sctp_paramhdr *req;
 	__u32 result;
 
 	req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0);
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 17854fb0e512..5f86c5062a98 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -158,7 +158,7 @@ struct sctp_ulpevent  *sctp_ulpevent_make_assoc_change(
 		/* Trim the buffer to the right length.  */
 		skb_trim(skb, sizeof(struct sctp_assoc_change) +
 			 ntohs(chunk->chunk_hdr->length) -
-			 sizeof(sctp_chunkhdr_t));
+			 sizeof(struct sctp_chunkhdr));
 	} else {
 		event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
 				  MSG_NOTIFICATION, gfp);
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 25f7e4140566..0225d62a869f 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1090,7 +1090,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 
 	if (chunk) {
 		needed = ntohs(chunk->chunk_hdr->length);
-		needed -= sizeof(sctp_data_chunk_t);
+		needed -= sizeof(struct sctp_data_chunk);
 	} else
 		needed = SCTP_DEFAULT_MAXWINDOW;
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1b92b72e812f..101e3597338f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2313,7 +2313,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk)
 	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
 
 	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
-		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1a0c961f4ffe..b9ee766054f6 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -212,7 +212,7 @@ EXPORT_SYMBOL_GPL(unix_peer_get);
 
 static inline void unix_release_addr(struct unix_address *addr)
 {
-	if (atomic_dec_and_test(&addr->refcnt))
+	if (refcount_dec_and_test(&addr->refcnt))
 		kfree(addr);
 }
 
@@ -442,7 +442,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 static int unix_writable(const struct sock *sk)
 {
 	return sk->sk_state != TCP_LISTEN &&
-	       (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 }
 
 static void unix_write_space(struct sock *sk)
@@ -487,7 +487,7 @@ static void unix_sock_destructor(struct sock *sk)
 
 	skb_queue_purge(&sk->sk_receive_queue);
 
-	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 	WARN_ON(!sk_unhashed(sk));
 	WARN_ON(sk->sk_socket);
 	if (!sock_flag(sk, SOCK_DEAD)) {
@@ -864,7 +864,7 @@ static int unix_autobind(struct socket *sock)
 		goto out;
 
 	addr->name->sun_family = AF_UNIX;
-	atomic_set(&addr->refcnt, 1);
+	refcount_set(&addr->refcnt, 1);
 
 retry:
 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
@@ -1040,7 +1040,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	memcpy(addr->name, sunaddr, addr_len);
 	addr->len = addr_len;
 	addr->hash = hash ^ sk->sk_type;
-	atomic_set(&addr->refcnt, 1);
+	refcount_set(&addr->refcnt, 1);
 
 	if (sun_path[0]) {
 		addr->hash = UNIX_HASH_SIZE;
@@ -1335,7 +1335,7 @@ restart:
 
 	/* copy address information from listening to new sock*/
 	if (otheru->addr) {
-		atomic_inc(&otheru->addr->refcnt);
+		refcount_inc(&otheru->addr->refcnt);
 		newu->addr = otheru->addr;
 	}
 	if (otheru->path.dentry) {
@@ -2033,7 +2033,7 @@ alloc_skb:
 	skb->len += size;
 	skb->data_len += size;
 	skb->truesize += size;
-	atomic_add(size, &sk->sk_wmem_alloc);
+	refcount_add(size, &sk->sk_wmem_alloc);
 
 	if (newskb) {
 		err = unix_scm_to_skb(&scm, skb, false);
@@ -2847,7 +2847,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
 			s,
-			atomic_read(&s->sk_refcnt),
+			refcount_read(&s->sk_refcnt),
 			0,
 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
 			s->sk_type,
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index abf81b329dc1..55b2ac300995 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -4,8 +4,7 @@
 
 obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
 		      xfrm_input.o xfrm_output.o \
-		      xfrm_sysctl.o xfrm_replay.o
-obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o
+		      xfrm_sysctl.o xfrm_replay.o xfrm_device.o
 obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 6d4a60d1bf19..5f7e8bfa0c2d 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -22,6 +22,7 @@
 #include <net/xfrm.h>
 #include <linux/notifier.h>
 
+#ifdef CONFIG_XFRM_OFFLOAD
 int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
 {
 	int err;
@@ -137,6 +138,7 @@ ok:
 	return true;
 }
 EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+#endif
 
 static int xfrm_dev_register(struct net_device *dev)
 {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a3dc7ab0b7ed..4706df612170 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1006,10 +1006,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 		err = -ESRCH;
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-
-	if (cnt)
-		xfrm_garbage_collect(net);
-
 	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6197c7231bc7..2be4c6af008a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2027,6 +2027,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			return 0;
 		return err;
 	}
+	xfrm_garbage_collect(net);
 
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index e7ec9b8539a5..9c650589e80f 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -36,6 +36,7 @@ hostprogs-y += lwt_len_hist
 hostprogs-y += xdp_tx_iptunnel
 hostprogs-y += test_map_in_map
 hostprogs-y += per_socket_stats_example
+hostprogs-y += load_sock_ops
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -52,6 +53,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o
 tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o
 tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o
 tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
+load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
 test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
 trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
 lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
@@ -111,6 +113,12 @@ always += lwt_len_hist_kern.o
 always += xdp_tx_iptunnel_kern.o
 always += test_map_in_map_kern.o
 always += cookie_uid_helper_example.o
+always += tcp_synrto_kern.o
+always += tcp_rwnd_kern.o
+always += tcp_bufs_kern.o
+always += tcp_cong_kern.o
+always += tcp_iw_kern.o
+always += tcp_clamp_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -130,6 +138,7 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt
 HOSTLOADLIBES_tracex5 += -lelf
 HOSTLOADLIBES_tracex6 += -lelf
 HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
+HOSTLOADLIBES_load_sock_ops += -lelf
 HOSTLOADLIBES_test_probe_write_user += -lelf
 HOSTLOADLIBES_trace_output += -lelf -lrt
 HOSTLOADLIBES_lathist += -lelf
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index f4840b8bb8f9..d50ac342dc92 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -60,6 +60,9 @@ static unsigned long long (*bpf_get_prandom_u32)(void) =
 	(void *) BPF_FUNC_get_prandom_u32;
 static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
 	(void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
+			     int optlen) =
+	(void *) BPF_FUNC_setsockopt;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index a91c57dd8571..a4be7cfa6519 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -64,6 +64,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
 	bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
 	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
+	bool is_sockops = strncmp(event, "sockops", 7) == 0;
 	size_t insns_cnt = size / sizeof(struct bpf_insn);
 	enum bpf_prog_type prog_type;
 	char buf[256];
@@ -89,6 +90,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_CGROUP_SKB;
 	} else if (is_cgroup_sk) {
 		prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+	} else if (is_sockops) {
+		prog_type = BPF_PROG_TYPE_SOCK_OPS;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -106,8 +109,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
 		return 0;
 
-	if (is_socket) {
-		event += 6;
+	if (is_socket || is_sockops) {
+		if (is_socket)
+			event += 6;
+		else
+			event += 7;
 		if (*event != '/')
 			return 0;
 		event++;
@@ -560,7 +566,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		    memcmp(shname, "xdp", 3) == 0 ||
 		    memcmp(shname, "perf_event", 10) == 0 ||
 		    memcmp(shname, "socket", 6) == 0 ||
-		    memcmp(shname, "cgroup/", 7) == 0)
+		    memcmp(shname, "cgroup/", 7) == 0 ||
+		    memcmp(shname, "sockops", 7) == 0)
 			load_and_attach(shname, data->d_buf, data->d_size);
 	}
 
diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c
new file mode 100644
index 000000000000..e5da6cf71a3e
--- /dev/null
+++ b/samples/bpf/load_sock_ops.c
@@ -0,0 +1,97 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+
+static void usage(char *pname)
+{
+	printf("USAGE:\n  %s [-l] <cg-path> <prog filename>\n", pname);
+	printf("\tLoad and attach a sock_ops program to the specified "
+	       "cgroup\n");
+	printf("\tIf \"-l\" is used, the program will continue to run\n");
+	printf("\tprinting the BPF log buffer\n");
+	printf("\tIf the specified filename does not end in \".o\", it\n");
+	printf("\tappends \"_kern.o\" to the name\n");
+	printf("\n");
+	printf("  %s -r <cg-path>\n", pname);
+	printf("\tDetaches the currently attached sock_ops program\n");
+	printf("\tfrom the specified cgroup\n");
+	printf("\n");
+	exit(1);
+}
+
+int main(int argc, char **argv)
+{
+	int logFlag = 0;
+	int error = 0;
+	char *cg_path;
+	char fn[500];
+	char *prog;
+	int cg_fd;
+
+	if (argc < 3)
+		usage(argv[0]);
+
+	if (!strcmp(argv[1], "-r")) {
+		cg_path = argv[2];
+		cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
+		error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+		if (error) {
+			printf("ERROR: bpf_prog_detach: %d (%s)\n",
+			       error, strerror(errno));
+			return 2;
+		}
+		return 0;
+	} else if (!strcmp(argv[1], "-h")) {
+		usage(argv[0]);
+	} else if (!strcmp(argv[1], "-l")) {
+		logFlag = 1;
+		if (argc < 4)
+			usage(argv[0]);
+	}
+
+	prog = argv[argc - 1];
+	cg_path = argv[argc - 2];
+	if (strlen(prog) > 480) {
+		fprintf(stderr, "ERROR: program name too long (> 480 chars)\n");
+		return 3;
+	}
+	cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
+
+	if (!strcmp(prog + strlen(prog)-2, ".o"))
+		strcpy(fn, prog);
+	else
+		sprintf(fn, "%s_kern.o", prog);
+	if (logFlag)
+		printf("loading bpf file:%s\n", fn);
+	if (load_bpf_file(fn)) {
+		printf("ERROR: load_bpf_file failed for: %s\n", fn);
+		printf("%s", bpf_log_buf);
+		return 4;
+	}
+	if (logFlag)
+		printf("TCP BPF Loaded %s\n", fn);
+
+	error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (error) {
+		printf("ERROR: bpf_prog_attach: %d (%s)\n",
+		       error, strerror(errno));
+		return 5;
+	} else if (logFlag) {
+		read_trace_pipe();
+	}
+
+	return error;
+}
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index b5524d417eb5..877ecf8fc5ac 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -8,6 +8,10 @@
 #include <arpa/inet.h>
 #include <sys/resource.h>
 
+#define PARSE_IP 3
+#define PARSE_IP_PROG_FD (prog_fd[0])
+#define PROG_ARRAY_FD (map_fd[0])
+
 struct bpf_flow_keys {
 	__be32 src;
 	__be32 dst;
@@ -28,7 +32,9 @@ int main(int argc, char **argv)
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	char filename[256];
 	FILE *f;
-	int i, sock;
+	int i, sock, err, id, key = PARSE_IP;
+	struct bpf_prog_info info = {};
+	uint32_t info_len = sizeof(info);
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 	setrlimit(RLIMIT_MEMLOCK, &r);
@@ -38,6 +44,13 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
+	/* Test fd array lookup which returns the id of the bpf_prog */
+	err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len);
+	assert(!err);
+	err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id);
+	assert(!err);
+	assert(id == info.id);
+
 	sock = open_raw_sock("lo");
 
 	assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c
new file mode 100644
index 000000000000..ee83bbabd17c
--- /dev/null
+++ b/samples/bpf/tcp_bufs_kern.c
@@ -0,0 +1,86 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial receive window to 40 packets and send
+ * and receive buffers to 1.5MB. This would usually be done after
+ * doing appropriate checks that indicate the hosts are far enough
+ * away (i.e. large RTT).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sockops")
+int bpf_bufs(struct bpf_sock_ops *skops)
+{
+	int bufsize = 1500000;
+	int rwnd_init = 40;
+	int rv = 0;
+	int op;
+
+	/* For testing purposes, only execute rest of BPF program
+	 * if neither port numberis 55601
+	 */
+	if (bpf_ntohl(skops->remote_port) != 55601 &&
+	    skops->local_port != 55601)
+		return -1;
+
+	op = (int) skops->op;
+
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+
+	/* Usually there would be a check to insure the hosts are far
+	 * from each other so it makes sense to increase buffer sizes
+	 */
+	switch (op) {
+	case BPF_SOCK_OPS_RWND_INIT:
+		rv = rwnd_init;
+		break;
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		/* Set sndbuf and rcvbuf of active connections */
+		rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+				    sizeof(bufsize));
+		rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+					     &bufsize, sizeof(bufsize));
+		break;
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		/* Nothing to do */
+		break;
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		/* Set sndbuf and rcvbuf of passive connections */
+		rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+				    sizeof(bufsize));
+		rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+					     &bufsize, sizeof(bufsize));
+		break;
+	default:
+		rv = -1;
+	}
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c
new file mode 100644
index 000000000000..d68eadd9ca2d
--- /dev/null
+++ b/samples/bpf/tcp_clamp_kern.c
@@ -0,0 +1,102 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Sample BPF program to set send and receive buffers to 150KB, sndcwnd clamp
+ * to 100 packets and SYN and SYN_ACK RTOs to 10ms when both hosts are within
+ * the same datacenter. For his example, we assume they are within the same
+ * datacenter when the first 5.5 bytes of their IPv6 addresses are the same.
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sockops")
+int bpf_clamp(struct bpf_sock_ops *skops)
+{
+	int bufsize = 150000;
+	int to_init = 10;
+	int clamp = 100;
+	int rv = 0;
+	int op;
+
+	/* For testing purposes, only execute rest of BPF program
+	 * if neither port numberis 55601
+	 */
+	if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601)
+		return -1;
+
+	op = (int) skops->op;
+
+#ifdef DEBUG
+	bpf_printk("BPF command: %d\n", op);
+#endif
+
+	/* Check that both hosts are within same datacenter. For this example
+	 * it is the case when the first 5.5 bytes of their IPv6 addresses are
+	 * the same.
+	 */
+	if (skops->family == AF_INET6 &&
+	    skops->local_ip6[0] == skops->remote_ip6[0] &&
+	    (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+	    (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
+		switch (op) {
+		case BPF_SOCK_OPS_TIMEOUT_INIT:
+			rv = to_init;
+			break;
+		case BPF_SOCK_OPS_TCP_CONNECT_CB:
+			/* Set sndbuf and rcvbuf of active connections */
+			rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF,
+					    &bufsize, sizeof(bufsize));
+			rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
+						      SO_RCVBUF, &bufsize,
+						      sizeof(bufsize));
+			break;
+		case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+			rv = bpf_setsockopt(skops, SOL_TCP,
+					    TCP_BPF_SNDCWND_CLAMP,
+					    &clamp, sizeof(clamp));
+			break;
+		case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+			/* Set sndbuf and rcvbuf of passive connections */
+			rv = bpf_setsockopt(skops, SOL_TCP,
+					    TCP_BPF_SNDCWND_CLAMP,
+					    &clamp, sizeof(clamp));
+			rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
+						      SO_SNDBUF, &bufsize,
+						      sizeof(bufsize));
+			rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
+						      SO_RCVBUF, &bufsize,
+						      sizeof(bufsize));
+			break;
+		default:
+			rv = -1;
+		}
+	} else {
+		rv = -1;
+	}
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c
new file mode 100644
index 000000000000..dac15bce1fa9
--- /dev/null
+++ b/samples/bpf/tcp_cong_kern.c
@@ -0,0 +1,83 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set congestion control to dctcp when both hosts are
+ * in the same datacenter (as deteremined by IPv6 prefix).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sockops")
+int bpf_cong(struct bpf_sock_ops *skops)
+{
+	char cong[] = "dctcp";
+	int rv = 0;
+	int op;
+
+	/* For testing purposes, only execute rest of BPF program
+	 * if neither port numberis 55601
+	 */
+	if (bpf_ntohl(skops->remote_port) != 55601 &&
+	    skops->local_port != 55601)
+		return -1;
+
+	op = (int) skops->op;
+
+#ifdef DEBUG
+	bpf_printk("BPF command: %d\n", op);
+#endif
+
+	/* Check if both hosts are in the same datacenter. For this
+	 * example they are if the 1st 5.5 bytes in the IPv6 address
+	 * are the same.
+	 */
+	if (skops->family == AF_INET6 &&
+	    skops->local_ip6[0] == skops->remote_ip6[0] &&
+	    (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+	    (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
+		switch (op) {
+		case BPF_SOCK_OPS_NEEDS_ECN:
+			rv = 1;
+			break;
+		case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+			rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION,
+					    cong, sizeof(cong));
+			break;
+		case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+			rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION,
+					    cong, sizeof(cong));
+			break;
+		default:
+			rv = -1;
+		}
+	} else {
+		rv = -1;
+	}
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c
new file mode 100644
index 000000000000..23c5122ef819
--- /dev/null
+++ b/samples/bpf/tcp_iw_kern.c
@@ -0,0 +1,88 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial congestion window and initial receive
+ * window to 40 packets and send and receive buffers to 1.5MB. This
+ * would usually be done after doing appropriate checks that indicate
+ * the hosts are far enough away (i.e. large RTT).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sockops")
+int bpf_iw(struct bpf_sock_ops *skops)
+{
+	int bufsize = 1500000;
+	int rwnd_init = 40;
+	int iw = 40;
+	int rv = 0;
+	int op;
+
+	/* For testing purposes, only execute rest of BPF program
+	 * if neither port numberis 55601
+	 */
+	if (bpf_ntohl(skops->remote_port) != 55601 &&
+	    skops->local_port != 55601)
+		return -1;
+
+	op = (int) skops->op;
+
+#ifdef DEBUG
+	bpf_printk("BPF command: %d\n", op);
+#endif
+
+	/* Usually there would be a check to insure the hosts are far
+	 * from each other so it makes sense to increase buffer sizes
+	 */
+	switch (op) {
+	case BPF_SOCK_OPS_RWND_INIT:
+		rv = rwnd_init;
+		break;
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		/* Set sndbuf and rcvbuf of active connections */
+		rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+				    sizeof(bufsize));
+		rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+					     &bufsize, sizeof(bufsize));
+		break;
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw,
+				    sizeof(iw));
+		break;
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		/* Set sndbuf and rcvbuf of passive connections */
+		rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+				    sizeof(bufsize));
+		rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+					     &bufsize, sizeof(bufsize));
+		break;
+	default:
+		rv = -1;
+	}
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c
new file mode 100644
index 000000000000..3f2a228f81ce
--- /dev/null
+++ b/samples/bpf/tcp_rwnd_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial receive window to 40 packets when using IPv6
+ * and the first 5.5 bytes of the IPv6 addresses are not the same (in this
+ * example that means both hosts are not the same datacenter).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sockops")
+int bpf_rwnd(struct bpf_sock_ops *skops)
+{
+	int rv = -1;
+	int op;
+
+	/* For testing purposes, only execute rest of BPF program
+	 * if neither port numberis 55601
+	 */
+	if (bpf_ntohl(skops->remote_port) !=
+	    55601 && skops->local_port != 55601)
+		return -1;
+
+	op = (int) skops->op;
+
+#ifdef DEBUG
+	bpf_printk("BPF command: %d\n", op);
+#endif
+
+	/* Check for RWND_INIT operation and IPv6 addresses */
+	if (op == BPF_SOCK_OPS_RWND_INIT &&
+		skops->family == AF_INET6) {
+
+		/* If the first 5.5 bytes of the IPv6 address are not the same
+		 * then both hosts are not in the same datacenter
+		 * so use a larger initial advertized window (40 packets)
+		 */
+		if (skops->local_ip6[0] != skops->remote_ip6[0] ||
+		    (bpf_ntohl(skops->local_ip6[1]) & 0xfffff000) !=
+		    (bpf_ntohl(skops->remote_ip6[1]) & 0xfffff000))
+			rv = 40;
+	}
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c
new file mode 100644
index 000000000000..3c3fc83d81cb
--- /dev/null
+++ b/samples/bpf/tcp_synrto_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set SYN and SYN-ACK RTOs to 10ms when using IPv6 addresses
+ * and the first 5.5 bytes of the IPv6 addresses are the same (in this example
+ * that means both hosts are in the same datacenter).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sockops")
+int bpf_synrto(struct bpf_sock_ops *skops)
+{
+	int rv = -1;
+	int op;
+
+	/* For testing purposes, only execute rest of BPF program
+	 * if neither port numberis 55601
+	 */
+	if (bpf_ntohl(skops->remote_port) != 55601 &&
+	    skops->local_port != 55601)
+		return -1;
+
+	op = (int) skops->op;
+
+#ifdef DEBUG
+	bpf_printk("BPF command: %d\n", op);
+#endif
+
+	/* Check for TIMEOUT_INIT operation and IPv6 addresses */
+	if (op == BPF_SOCK_OPS_TIMEOUT_INIT &&
+		skops->family == AF_INET6) {
+
+		/* If the first 5.5 bytes of the IPv6 address are the same
+		 * then both hosts are in the same datacenter
+		 * so use an RTO of 10ms
+		 */
+		if (skops->local_ip6[0] == skops->remote_ip6[0] &&
+		    (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+		    (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000))
+			rv = 10;
+	}
+#ifdef DEBUG
+	bpf_printk("Returning %d\n", rv);
+#endif
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
index f62fdc2bd428..1aca18539d8d 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -32,6 +32,20 @@ static const char * const test_names[] = {
 
 #define NR_TESTS (sizeof(test_names) / sizeof(*test_names))
 
+static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key)
+{
+	struct bpf_map_info info = {};
+	uint32_t info_len = sizeof(info);
+	int ret, id;
+
+	ret = bpf_obj_get_info_by_fd(inner_map_fd, &info, &info_len);
+	assert(!ret);
+
+	ret = bpf_map_lookup_elem(map_in_map_fd, &key, &id);
+	assert(!ret);
+	assert(id == info.id);
+}
+
 static void populate_map(uint32_t port_key, int magic_result)
 {
 	int ret;
@@ -45,12 +59,15 @@ static void populate_map(uint32_t port_key, int magic_result)
 
 	ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY);
 	assert(!ret);
+	check_map_id(PORT_A, A_OF_PORT_A, port_key);
 
 	ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST);
 	assert(!ret);
+	check_map_id(PORT_A, H_OF_PORT_A, port_key);
 
 	ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST);
 	assert(!ret);
+	check_map_id(PORT_H, H_OF_PORT_H, port_key);
 }
 
 static void test_map_in_map(void)
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index ce753a408c56..c583a1e1bd3c 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -14,7 +14,15 @@ __headers:
 include scripts/Kbuild.include
 
 srcdir        := $(srctree)/$(obj)
-subdirs       := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.))
+
+# When make is run under a fakechroot environment, the function
+# $(wildcard $(srcdir)/*/.) doesn't only return directories, but also regular
+# files. So, we are using a combination of sort/dir/wildcard which works
+# with fakechroot.
+subdirs       := $(patsubst $(srcdir)/%/,%,\
+		 $(filter-out $(srcdir)/,\
+		 $(sort $(dir $(wildcard $(srcdir)/*/)))))
+
 # caller may set destination dir (when installing to asm/)
 _dst          := $(if $(dst),$(dst),$(obj))
 
diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h
index 3bffdcaaa274..b724a0290c75 100644
--- a/scripts/genksyms/genksyms.h
+++ b/scripts/genksyms/genksyms.h
@@ -75,7 +75,7 @@ struct string_list *copy_list_range(struct string_list *start,
 int yylex(void);
 int yyparse(void);
 
-void error_with_pos(const char *, ...);
+void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2)));
 
 /*----------------------------------------------------------------------*/
 #define xmalloc(size) ({ void *__ptr = malloc(size);		\
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 90a091b6ae4d..eb8144643b78 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -196,7 +196,7 @@ clean-files     += config.pot linux.pot
 
 # Check that we have the required ncurses stuff installed for lxdialog (menuconfig)
 PHONY += $(obj)/dochecklxdialog
-$(addprefix $(obj)/,$(lxdialog)): $(obj)/dochecklxdialog
+$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/dochecklxdialog
 $(obj)/dochecklxdialog:
 	$(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_EXTRACFLAGS) $(HOSTLOADLIBES_mconf)
 
diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c
index a9bc5334a478..003114779815 100644
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c
@@ -271,7 +271,7 @@ static struct mitem k_menu_items[MAX_MENU_ITEMS];
 static int items_num;
 static int global_exit;
 /* the currently selected button */
-const char *current_instructions = menu_instructions;
+static const char *current_instructions = menu_instructions;
 
 static char *dialog_input_result;
 static int dialog_input_result_len;
@@ -305,7 +305,7 @@ struct function_keys {
 };
 
 static const int function_keys_num = 9;
-struct function_keys function_keys[] = {
+static struct function_keys function_keys[] = {
 	{
 		.key_str = "F1",
 		.func = "Help",
@@ -508,7 +508,7 @@ static int get_mext_match(const char *match_str, match_f flag)
 	index = (index + items_num) % items_num;
 	while (true) {
 		char *str = k_menu_items[index].str;
-		if (strcasestr(str, match_str) != 0)
+		if (strcasestr(str, match_str) != NULL)
 			return index;
 		if (flag == FIND_NEXT_MATCH_UP ||
 		    flag == MATCH_TINKER_PATTERN_UP)
@@ -1067,7 +1067,7 @@ static int do_match(int key, struct match_state *state, int *ans)
 
 static void conf(struct menu *menu)
 {
-	struct menu *submenu = 0;
+	struct menu *submenu = NULL;
 	const char *prompt = menu_get_prompt(menu);
 	struct symbol *sym;
 	int res;
@@ -1234,7 +1234,7 @@ static void show_help(struct menu *menu)
 static void conf_choice(struct menu *menu)
 {
 	const char *prompt = _(menu_get_prompt(menu));
-	struct menu *child = 0;
+	struct menu *child = NULL;
 	struct symbol *active;
 	int selected_index = 0;
 	int last_top_row = 0;
@@ -1456,7 +1456,7 @@ static void conf_save(void)
 	}
 }
 
-void setup_windows(void)
+static void setup_windows(void)
 {
 	int lines, columns;
 
diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c
index 4b2f44c20caf..a64b1c31253e 100644
--- a/scripts/kconfig/nconf.gui.c
+++ b/scripts/kconfig/nconf.gui.c
@@ -129,7 +129,7 @@ static void no_colors_theme(void)
 	mkattrn(FUNCTION_TEXT, A_REVERSE);
 }
 
-void set_colors()
+void set_colors(void)
 {
 	start_color();
 	use_default_colors();
@@ -192,7 +192,7 @@ const char *get_line(const char *text, int line_no)
 	int lines = 0;
 
 	if (!text)
-		return 0;
+		return NULL;
 
 	for (i = 0; text[i] != '\0' && lines < line_no; i++)
 		if (text[i] == '\n')
diff --git a/scripts/tags.sh b/scripts/tags.sh
index d661f2f3ef61..d23dcbf17457 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -106,6 +106,7 @@ all_compiled_sources()
 		case "$i" in
 			*.[cS])
 				j=${i/\.[cS]/\.o}
+				j="${j#$tree}"
 				if [ -e $j ]; then
 					echo $i
 				fi
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 5088d4b8db22..009e6c98754e 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -2492,7 +2492,7 @@ static int pcm_chmap_ctl_get(struct snd_kcontrol *kcontrol,
 	struct snd_pcm_substream *substream;
 	const struct snd_pcm_chmap_elem *map;
 
-	if (snd_BUG_ON(!info->chmap))
+	if (!info->chmap)
 		return -EINVAL;
 	substream = snd_pcm_chmap_substream(info, idx);
 	if (!substream)
@@ -2524,7 +2524,7 @@ static int pcm_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag,
 	unsigned int __user *dst;
 	int c, count = 0;
 
-	if (snd_BUG_ON(!info->chmap))
+	if (!info->chmap)
 		return -EINVAL;
 	if (size < 8)
 		return -ENOMEM;
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 9e6f54f8c45d..1e26854b3425 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -682,7 +682,9 @@ static void out_stream_callback(struct fw_iso_context *context, u32 tstamp,
 		cycle = increment_cycle_count(cycle, 1);
 		if (s->handle_packet(s, 0, cycle, i) < 0) {
 			s->packet_index = -1;
-			amdtp_stream_pcm_abort(s);
+			if (in_interrupt())
+				amdtp_stream_pcm_abort(s);
+			WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
 			return;
 		}
 	}
@@ -734,7 +736,9 @@ static void in_stream_callback(struct fw_iso_context *context, u32 tstamp,
 	/* Queueing error or detecting invalid payload. */
 	if (i < packets) {
 		s->packet_index = -1;
-		amdtp_stream_pcm_abort(s);
+		if (in_interrupt())
+			amdtp_stream_pcm_abort(s);
+		WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
 		return;
 	}
 
diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h
index 7e8831722821..ea1a91e99875 100644
--- a/sound/firewire/amdtp-stream.h
+++ b/sound/firewire/amdtp-stream.h
@@ -135,7 +135,7 @@ struct amdtp_stream {
 	/* For a PCM substream processing. */
 	struct snd_pcm_substream *pcm;
 	struct tasklet_struct period_tasklet;
-	unsigned int pcm_buffer_pointer;
+	snd_pcm_uframes_t pcm_buffer_pointer;
 	unsigned int pcm_period_pointer;
 
 	/* To wait for first packet. */
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 1770f085c2a6..01eb1dc7b5b3 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -370,10 +370,12 @@ enum {
 #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
 #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
 #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_BXT_T(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x1a98)
 #define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
-			IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci)	|| \
-			IS_GLK(pci)
+#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci) || \
+			  IS_BXT_T(pci) || IS_KBL(pci) || IS_KBL_LP(pci) || \
+			  IS_KBL_H(pci)	|| IS_GLK(pci) || IS_CFL(pci))
 
 static char *driver_short_names[] = {
 	[AZX_DRIVER_ICH] = "HDA Intel",
@@ -2378,6 +2380,9 @@ static const struct pci_device_id azx_ids[] = {
 	/* Kabylake-H */
 	{ PCI_DEVICE(0x8086, 0xa2f0),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+	/* Coffelake */
+	{ PCI_DEVICE(0x8086, 0xa348),
+	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE},
 	/* Broxton-P(Apollolake) */
 	{ PCI_DEVICE(0x8086, 0x5a98),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f94b48b168dc..ce2988be4f0e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -120,12 +120,14 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_IN,
 	BPF_PROG_TYPE_LWT_OUT,
 	BPF_PROG_TYPE_LWT_XMIT,
+	BPF_PROG_TYPE_SOCK_OPS,
 };
 
 enum bpf_attach_type {
 	BPF_CGROUP_INET_INGRESS,
 	BPF_CGROUP_INET_EGRESS,
 	BPF_CGROUP_INET_SOCK_CREATE,
+	BPF_CGROUP_SOCK_OPS,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -518,6 +520,25 @@ union bpf_attr {
  *     Set full skb->hash.
  *     @skb: pointer to skb
  *     @hash: hash to set
+ *
+ * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
+ *     Calls setsockopt. Not all opts are available, only those with
+ *     integer optvals plus TCP_CONGESTION.
+ *     Supported levels: SOL_SOCKET and IPROTO_TCP
+ *     @bpf_socket: pointer to bpf_socket
+ *     @level: SOL_SOCKET or IPROTO_TCP
+ *     @optname: option name
+ *     @optval: pointer to option value
+ *     @optlen: length of optval in byes
+ *     Return: 0 or negative error
+ *
+ * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
+ *     Grow or shrink room in sk_buff.
+ *     @skb: pointer to skb
+ *     @len_diff: (signed) amount of room to grow/shrink
+ *     @mode: operation mode (enum bpf_adj_room_mode)
+ *     @flags: reserved for future use
+ *     Return: 0 on success or negative error code
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -568,7 +589,9 @@ union bpf_attr {
 	FN(probe_read_str),		\
 	FN(get_socket_cookie),		\
 	FN(get_socket_uid),		\
-	FN(set_hash),
+	FN(set_hash),			\
+	FN(setsockopt),			\
+	FN(skb_adjust_room),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -618,6 +641,11 @@ enum bpf_func_id {
 /* BPF_FUNC_perf_event_output for sk_buff input context. */
 #define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+	BPF_ADJ_ROOM_NET_OPTS,
+};
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -720,4 +748,54 @@ struct bpf_map_info {
 	__u32 map_flags;
 } __attribute__((aligned(8)));
 
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+	__u32 op;
+	union {
+		__u32 reply;
+		__u32 replylong[4];
+	};
+	__u32 family;
+	__u32 remote_ip4;
+	__u32 local_ip4;
+	__u32 remote_ip6[4];
+	__u32 local_ip6[4];
+	__u32 remote_port;
+	__u32 local_port;
+};
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+	BPF_SOCK_OPS_VOID,
+	BPF_SOCK_OPS_TIMEOUT_INIT,	/* Should return SYN-RTO value to use or
+					 * -1 if default value should be used
+					 */
+	BPF_SOCK_OPS_RWND_INIT,		/* Should return initial advertized
+					 * window (in packets) or -1 if default
+					 * value should be used
+					 */
+	BPF_SOCK_OPS_TCP_CONNECT_CB,	/* Calls BPF program right before an
+					 * active connection is initialized
+					 */
+	BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB,	/* Calls BPF program when an
+						 * active connection is
+						 * established
+						 */
+	BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,	/* Calls BPF program when a
+						 * passive connection is
+						 * established
+						 */
+	BPF_SOCK_OPS_NEEDS_ECN,		/* If connection's congestion control
+					 * needs ECN
+					 */
+};
+
+#define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP	1002	/* Set sndcwnd_clamp */
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 84e7e698411e..a2670e9d652d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -619,7 +619,7 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp,
 					   struct map *map, unsigned long offs)
 {
 	struct symbol *sym;
-	u64 addr = tp->address + tp->offset - offs;
+	u64 addr = tp->address - offs;
 
 	sym = map__find_symbol(map, addr);
 	if (!sym)
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c0af0195432f..404aec520812 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2658,6 +2658,171 @@ static struct bpf_test tests[] = {
 		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
+		"direct packet access: test18 (imm += pkt_ptr, 1)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_IMM(BPF_REG_0, 8),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test19 (imm += pkt_ptr, 2)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			BPF_MOV64_IMM(BPF_REG_4, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+			BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test20 (x += pkt_ptr, 1)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"direct packet access: test21 (x += pkt_ptr, 2)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
+			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0xffff),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"direct packet access: test22 (x += pkt_ptr, 3)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 48),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"direct packet access: test23 (x += pkt_ptr, 4)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_0, 31),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "cannot add integer value with 47 upper zero bits to ptr_to_packet",
+	},
+	{
+		"direct packet access: test24 (x += pkt_ptr, 5)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_0, 64),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -3767,6 +3932,72 @@ static struct bpf_test tests[] = {
 		.errstr = "invalid bpf_context access",
 	},
 	{
+		"leak pointer into ctx 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 2 },
+		.errstr_unpriv = "R2 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"leak pointer into ctx 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R10 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"leak pointer into ctx 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 1 },
+		.errstr_unpriv = "R2 leaks addr into ctx",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"leak pointer into map val",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr_unpriv = "R6 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
 		"helper access to map: full range",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),